From 97fdefab064f63642fa3ece05b807d29b459df31 Mon Sep 17 00:00:00 2001 From: Hui Lan Date: Wed, 4 Dec 2019 19:03:19 +0800 Subject: brain: add python and R code to local repository. --- Code/correlation_per_tissue.R | 101 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 Code/correlation_per_tissue.R (limited to 'Code/correlation_per_tissue.R') diff --git a/Code/correlation_per_tissue.R b/Code/correlation_per_tissue.R new file mode 100644 index 0000000..d9aadf9 --- /dev/null +++ b/Code/correlation_per_tissue.R @@ -0,0 +1,101 @@ +# Last modified on 8 Aug 2019 + +TISSUE.FILE <- '../Data/information/experiment.and.tissue.txt' +DATA.FILE <- '../Data/history/expr/TPM.txt' +TEMP.DIR <- '../Data/temp' +TARGET.FILE <- '../Data/temp/all_targets.txt' +TF.FILE <- '../Data/temp/all_tfs.txt' +tau <- 0.60 + + +# Make sure we have required files +if (! file.exists(TISSUE.FILE)) { + stop(sprintf('[correlation_per_tissue.R] Unable to find %s', TISSUE.FILE)) +} + +if (! file.exists(DATA.FILE)) { + stop(sprintf('[correlation_per_tissue.R] Unable to find %s', DATA.FILE)) +} + +if (! dir.exists(TEMP.DIR)) { + stop(sprintf('[correlation_per_tissue.R] Unable to find directory %s', TEMP.DIR)) +} + +if (! file.exists(TARGET.FILE)) { + stop(sprintf('[correlation_per_tissue.R] Unable to find %s', TARGET.FILE)) +} + +if (! file.exists(TF.FILE)) { + stop(sprintf('[correlation_per_tissue.R] Unable to find %s', TF.FILE)) +} + + +X0 <- read.table(DATA.FILE, header=TRUE, check.names=FALSE) +all.id <- X0$gene_id +X0$gene_id <- NULL # remove column gene_id +row.names(X0) <- all.id # add row names +all_genes <- rownames(X0) + +tissue <- read.table(TISSUE.FILE, header=TRUE, check.names=FALSE, sep='\t') +labels <- as.vector(tissue$suggested.tissue) +labels <- unlist(lapply(labels, function(x) {e<-regexpr("\\.", x)[1]; if (e > 0) {y<-substr(x, 1, e-1)} else {x} })) # remove subcategories +unique.label <- unique(labels) + +targets <- read.table(TARGET.FILE, header=FALSE) +tfs <- read.table(TF.FILE, header=FALSE) +targets <- as.vector(targets$V1) +tfs <- as.vector(tfs$V1) + + +############################################################## +get.index <- function(X, tissue.matrix, tissue.name) { + labels <- as.vector(tissue.matrix$suggested.tissue) + labels <- unlist(lapply(labels, function(x) {e<-regexpr("\\.", x)[1]; if (e > 0) {y<-substr(x, 1, e-1)} else {x} })) # remove subcategories + index <- c() + count <- 1 + for (rseqid in colnames(X)) { + i <- which(as.vector(tissue.matrix$run.id) == rseqid) + if (length(i) != 0) { + suggested.tissue.name <- labels[i] + if (tissue.name == suggested.tissue.name) { + index <- c(index, count) + } + } + count <- count + 1 + } + index +} +############################################################## + + +# for each tissue type, get a correlation matrix +for (ul in unique.label) { + index.rnaseq <- get.index(X0, tissue, ul) + if (length(index.rnaseq) >= 50) { + OUTPUT.FILE <- paste('../Data/temp/edges.txt.simple.correlation.tissue', ul, 'txt', sep='.') + + X <- as.matrix(X0[, index.rnaseq]) + sd.1 <- apply(X, 1, sd) # sd of each row + s0 <- apply(X, 1, function(c) sum(c==0)) # number of zeros in each row + sd.tau <- (quantile(sd.1)[1] + quantile(sd.1)[2]) / 2.0 # min SD + good <- sd.1 > max(sd.tau, 0.05) + tf_good <- which( good & (all_genes %in% tfs) == T ) + target_good <- which( good & (all_genes %in% targets) == T ) + + # Compute correlation coefficient + X <- log(X + 1) + X[X<0.01] <- NA + if (length(tf_good) > 1) { + c <- cor(t(X[target_good,]), t(X[tf_good,]), use='pairwise.complete.obs') + } else { + c <- cor(t(X[target_good,]), t(X[c(tf_good, tf_good), ]), use='pairwise.complete.obs') + } + index <- !is.na(c) & abs(c) >= tau & abs(c) <= 0.99 + row_names <- rownames(c) + col_names <- colnames(c) + result <- data.frame(row = row_names[row(c)[index]], col = col_names[col(c)[index]], r = c[index], tissue=rep(ul, sum(index)), numrnaseqids=rep(length(index.rnaseq), sum(index))) + + # write results + write.table(result, OUTPUT.FILE, col.names=F, row.names=F, sep='\t', quote=F) + } +} -- cgit v1.2.1