From 97fdefab064f63642fa3ece05b807d29b459df31 Mon Sep 17 00:00:00 2001
From: Hui Lan <lanhui@zjnu.edu.cn>
Date: Wed, 4 Dec 2019 19:03:19 +0800
Subject: brain: add python and R code to local repository.

---
 Code/wedge.R | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 138 insertions(+)
 create mode 100644 Code/wedge.R

(limited to 'Code/wedge.R')

diff --git a/Code/wedge.R b/Code/wedge.R
new file mode 100644
index 0000000..50039eb
--- /dev/null
+++ b/Code/wedge.R
@@ -0,0 +1,138 @@
+# Last modified on 7 Agu 2019 by Hui Lan @ Jinhua
+#DATA.FILE      <- '../Data/history/expr/TPM.txt.3130'
+DATA.FILE      <- '../Data/history/expr/TPM.txt'
+TARGET.TF.FILE <- '../Data/information/target_tf.txt'
+AGINAME.FILE   <- '../Data/information/AGI-to-gene-names_v2.txt'
+ONE.TARGET.DIR <- '../Data/history/edges/one_target'
+
+# Make sure we have required files and directory 
+if (! file.exists(DATA.FILE)) {
+   stop(sprintf('[wedge.R] Unable to find %s', DATA.FILE))
+}
+
+if (! file.exists(TARGET.TF.FILE)) {
+   stop(sprintf('[wedge.R] Unable to find %s', TARGET.TF.FILE))
+}
+
+if (! file.exists(AGINAME.FILE)) {
+   stop(sprintf('[wedge.R] Unable to find %s', AGINAME.FILE))
+}
+
+
+if (! dir.exists(ONE.TARGET.DIR)) {
+   stop(sprintf('[wedge.R] Unable to find directory %s', ONE.TARGET.DIR))
+}
+
+
+r.tau        <- 0.60
+
+cat(sprintf('Read %s\n', DATA.FILE))
+X            <- read.table(DATA.FILE, header=TRUE, check.names=FALSE)
+all.id        <- X$gene_id
+X$gene_id     <- NULL   # remove column gene_id
+row.names(X)  <- all.id # add row names
+all.genes     <- rownames(X)
+
+cat(sprintf('Read %s\n', AGINAME.FILE))
+#agi        <- read.table(AGINAME.FILE, sep='\t', header=FALSE, row.names=1, stringsAsFactors=F) # AGINAME_FILE cannot contain quotes
+agi        <- read.table(AGINAME.FILE, stringsAsFactors=F) # AGINAME_FILE cannot contain quotes
+
+cat(sprintf('Read %s\n', TARGET.TF.FILE))
+target.tf <- read.table(TARGET.TF.FILE, header=FALSE, check.names=FALSE, sep='\t')
+total.pair <- dim(target.tf)[1]
+
+###########################################################################
+post.translation.4 <- function(x, y) {
+    mx = mean(x)
+    index = (x > mx - 0.5) & (x < mx + 0.5)
+    slope = max(y[index])/mx
+    v = c(-slope, 1)
+    xy = as.matrix(cbind(x,y))
+    z = xy %*% v
+    index0 = which(z <= 0) # points below the wedge
+    index1 = which(z > 0)  # points above the wedge
+    index2 = which(x <= 0.1) # x has low value, then y is expected to have low value too
+    if (length(index2) > 0) {
+        q = quantile(y[index2], 0.9)
+        m = mean(y[index2])
+    } else {
+        q = 0.0
+        m = 0.0
+    }
+    index3 = which(x < 1)
+    if (length(index3) > 0) {
+        m = mean(y[index3])
+    } else {
+        m = 0.0
+    }
+    # for a scatterplot to be considered a wedge shape, percent>0.90,q < 1 and
+    # m < slope, disp.x < disp.y
+    result <- list(below=index0, upper=index1, percent=length(index0)/length(x), q=q, m=m, slope=slope, disp.x=sd(x)/mean(x), disp.y=sd(y)/mean(y))
+}
+
+
+make.data <- function(slope, n) {
+    x=abs(3.0 + 1*rnorm(n))
+    y=abs(3.0 + 1*rnorm(n))
+    v = c(-slope, 1)
+    xy = as.matrix(cbind(x,y))
+    z = xy %*% v
+    index = which(z <= 0)
+    result <- list(x=x[index], y=y[index])
+}
+###########################################################################
+
+cat(sprintf('Go through pairs looking for wedge shapes ..\n'))
+
+output.file <- paste('../Data/history/edges/one_target/edges.txt', 'wedge', format(Sys.time(), "%b.%d.%Y.%H%M%S"), sep='.')
+f <- file(output.file, 'w')
+	    
+for (i in 1:total.pair) {
+    id1 <- as.vector(target.tf[i,2]) # tf
+    id2 <- as.vector(target.tf[i,1]) # target
+
+    all.in <- id1 %in% all.genes & id2 %in% all.genes
+    if (!all.in) {
+        next
+    }
+    
+    x <- X[id1,]
+    y <- X[id2,]
+    x <- log(x+1)
+    y <- log(y+1)
+    x <- t(x)
+    y <- t(y)
+    na.ratio <- max(sum(is.na(x))/length(x), sum(is.na(y))/length(y))
+    index <- x < 0.01 | y < 0.01 | na.ratio > 0.5 # make sure very small values are not included
+    x <- x[!index, 1, drop=FALSE]
+    y <- y[!index, 1, drop=FALSE]
+
+    if (dim(x)[1] < 50) {
+        next
+    }
+
+    # We will not consider wedge shape if the correlation coefficient is large enough.
+    if (abs(cor(x, y)) < r.tau) {
+        result <- post.translation.4(x, y)
+        if (result$percent > 0.95 & result$q < 0.25 & result$m < 1.0 & result$disp.y > 1.2 * result$disp.x) {
+            #name1 <- agi[id1,1]
+            #name2 <- agi[id2,1]
+	    name1 <- agi$V2[which(agi$V1 == id1)]
+	    name2 <- agi$V2[which(agi$V1 == id2)]	    
+            max.r <- max(r.tau, result$percent * exp(-max(result$q, result$m)))
+            curr.date <- gsub('-','',Sys.Date())
+            loglik <- '-1001.0'
+            rna.sample <- row.names(x)[result$below] # below the diagonal line
+            #rna.sample.size <- length(rna.sample)
+            #rna.sample.2 <- sample(rna.sample, ceiling(rna.sample.size^0.7)) # to save space, keep only a fraction of the rnaseq sample IDs
+            sub.cond <- paste(rna.sample, collapse=' ')
+	    sub.cond.length <- length(rna.sample)
+            cond <- as.vector(target.tf[i,3])
+            result2 <- sprintf('%s %s\t%s %s\t%4.2f\t%s\t%s\t%s\t%s\t%s\t%4.2f\t%s\n', id2, name2, id1, name1, max.r, 'mix', sub.cond.length, cond, loglik, curr.date, max.r, 'wedge')
+
+            cat(result2, file=f, sep='')
+        }
+    }
+}
+
+close(f)
-- 
cgit v1.2.1