# Utility functions # Purpose: check what new edges have been created today. # Created by Hui on 20 July 2021 import os import shutil import mmap from datetime import datetime def get_edge_set(fname): result = set() with open(fname) as f: for line in f: line = line.strip() lst = line.split('\t') if len(lst) == 10: target_id = lst[0].split()[0].upper() tf_id = lst[1].split()[0].upper() result.add(target_id + '_' + tf_id) return result def make_new_edges_file(Sold, Snew, fname_new, output_file): Sdiff = Snew.difference(Sold) result = [] with open(fname_new) as f: for line in f: line = line.strip() lst = line.split('\t') if len(lst) == 10: target_id = lst[0].split()[0].upper() tf_id = lst[1].split()[0].upper() if target_id + '_' + tf_id in Sdiff: # this is a new edge line, keep it result.append(line) with open(output_file, 'w') as f: header = '\t'.join(['target_id target_name', 'source_id source_name', 'score', 'type of score', 'RNA-seqs', 'ChIP-seq', 'Log likelihood', 'Date', 'Strength', 'Inference method']) + '\n' f.write(header) for line in result: f.write(line + '\n') f.write('*** Number of newly added edges: %d. Last modification date: %s. ***\n' % (len(result), datetime.now().strftime('%Y-%m-%d %H:%M'))) def make_paths(s): if not os.path.isdir(s): os.makedirs(s) def write_first_column(lst, fname): with open(fname, 'w') as f: for x in lst: f.write(x + '\n') def append_column_fast(basefile, col): ''' Append col to basefile. If basefile does not exist, then create it and col will be basefile's first column.''' if not os.path.exists(basefile): with open(basefile, 'w') as f: count = 0 for x in col: f.write(x + '\n') count += 1 return count with open(basefile) as f: lines = f.readlines() if len(lines) != len(col): return count = 0 with open(basefile, 'w') as f: for line in lines: line = line.strip() new_line = line + '\t' + col[count] + '\n' count += 1 f.write(new_line) return count def append_column_fast2(basefile, col): ''' Append col to basefile. If basefile does not exist, then create it and col will be basefile's first column.''' if not os.path.exists(basefile): with open(basefile, 'w') as f: count = 0 for x in col: f.write(x + '\n') count += 1 return count with open(basefile) as f: with mmap.mmap(f.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_f: lines = mmap_f.read().split(b'\n')[:-1] if len(lines) != len(col): return count = 0 content = '' with open(basefile, 'w') as f: for line in lines: line = line.decode().strip() new_line = line + '\t' + col[count] + '\n' content += new_line count += 1 f.write(content) return count if __name__ == '__main__': S2 = get_edge_set('/home/lanhui/brain/Data/temp/edges.txt') S1 = get_edge_set('/home/lanhui/brain/Data/temp/edges.txt.old') make_new_edges_file(S1, S2, '/home/lanhui/brain/Data/temp/edges.txt', 'mynewedges.txt')