1 files changed, 99 insertions, 0 deletions
diff --git a/Code/create_edges.r b/Code/create_edges.r
new file mode 100644
index 0000000..1e80447
--- /dev/null
+++ b/Code/create_edges.r
@@ -0,0 +1,99 @@
+TARGET_TF_FILE <- "../Data/information/target_tf.txt"
+DATA_FILE      <- "../Data/history/expr/TPM.txt" # A TPM table
+AGINAME_FILE   <- "../Data/information/AGI-to-gene-names_v2.txt"
+CORR_THRESHOLD     <- 0.6
+MIN_SIZE           <- 100
+
+####### Read data #########################################
+X             <- read.table(DATA_FILE, header=TRUE, check.names=FALSE)
+gene_id       <- X$gene_id
+X$gene_id     <- NULL
+row.names(X)  <- gene_id
+X             <- as.matrix(X)
+rna.sample.id <- colnames(X)
+
+target_tf  <- read.table(TARGET_TF_FILE, sep='\t', header=FALSE)
+target_tf  <- as.matrix(target_tf)
+targets    <- target_tf[,1]
+tfs        <- target_tf[,2]
+conditions <- target_tf[,3]
+
+agi        <- read.table(AGINAME_FILE, sep='\t', header=FALSE, row.names=1, stringsAsFactors=F) # AGINAME_FILE cannot contain quotes
+#######################################################
+
+library(mixtools)
+result.file <- '../Data/history/edges/many_targets/edges.txt.20170306_1015'
+for (i in 1:length(targets)) {
+    out <- file(result.file, 'a')
+    curr.date <- gsub('-','',Sys.Date())
+    id1 <- tfs[i]
+    id2 <- targets[i]
+    if (id1 %in% gene_id == F || id2 %in% gene_id == F) {
+        next
+    }    
+    name1 <- agi[id1,1]
+    name2 <- agi[id2,1]
+    cond <- conditions[i]
+    x <- X[id1,]
+    y <- X[id2,]
+    x <- log(x+1)
+    y <- log(y+1)
+    index <- x < 0.01 | y < 0.01
+    x <- x[!index]
+    y <- y[!index]
+    r <- cor(x, y)
+    if (abs(r) >= CORR_THRESHOLD) {
+        s = sprintf('%s %s\t%s %s\t%4.2f\t%s\t%s\t%s\t%s\t%s', id2, name2,id1,name1, r, 'all', '.', cond, '.', curr.date)
+        #cat(s, file=result.file, sep='\n', append=T)
+	writeLines(s, con=out)
+        next
+    }
+
+    k <- 3
+    N <- length(x)
+    em.out <- regmixEM(y,x,maxit=100, epsilon=1e-04, k=k)
+
+    pos_r_max   <- -2
+    pos_r_N     <- 0
+    pos_r_index <- c()
+    pos_r_loglik <- -100000000
+
+    neg_r_max   <- 2
+    neg_r_N     <- 0
+    neg_r_index <- c()
+    neg_r_loglik <- -100000000
+
+    for (j in seq(1,k,1)) {
+
+        index <- which(max.col(em.out$posterior) == j)
+        size <- length(index)
+        r <- cor(em.out$x[index,2], em.out$y[index])
+
+        if (!is.na(r) && r >= CORR_THRESHOLD && size >= MIN_SIZE && r > pos_r_max && size > pos_r_N) {
+            pos_r_max <- r
+            pos_r_N   <- size
+            pos_r_index <- index
+            pos_r_loglik <- em.out$loglik
+        }
+        if (!is.na(r) && r <= -CORR_THRESHOLD && size >= MIN_SIZE && r < neg_r_max && size > neg_r_N) {
+            neg_r_max <- r
+            neg_r_N   <- size
+            neg_r_index <- index
+            neg_r_loglik <- em.out$loglik
+        }
+    }
+
+    if (pos_r_max > 0) {
+        sub.cond <- paste(rna.sample.id[pos_r_index], collapse=' ')
+        s = sprintf('%s %s\t%s %s\t%4.2f\t%s\t%s\t%s\tloglik=%4.2f\t%s', id2, name2, id1, name1, pos_r_max, 'mix', sub.cond, cond, pos_r_loglik, curr.date)
+        #cat(s, file=result.file, sep='\n', append=T)
+	writeLines(s, con=out)	
+    }
+    if (neg_r_max < 0) {
+        sub.cond <- paste(rna.sample.id[neg_r_index], collapse=' ')
+        s = sprintf('%s %s\t%s %s\t%4.2f\t%s\t%s\t%s\tloglik=%4.2f\t%s', id2, name2, id1, name1, neg_r_max, 'mix', sub.cond, cond, neg_r_loglik, curr.date)
+        #cat(s, file=result.file, sep='\n', append=T)
+	writeLines(s, con=out)	
+    }
+    close(out)    
+}