diff options
| -rw-r--r-- | Code/update_network_by_force.py | 116 | 
1 files changed, 54 insertions, 62 deletions
| diff --git a/Code/update_network_by_force.py b/Code/update_network_by_force.py index fae2965..125ca60 100644 --- a/Code/update_network_by_force.py +++ b/Code/update_network_by_force.py @@ -44,38 +44,37 @@ def age_of_file_in_seconds(fname):      return seconds +def new_edge_line(line, file_lst): +    for fname in file_lst: +        with open(fname) as f: +            existing_lines = lines_with_10_fields(f.read()) +            if line in existing_lines: +                return False +    return True + +  def concatenate_edge_files(fname_lst, dir_out, fname_out):      fout = open(os.path.join(dir_out, fname_out), 'w')      for fname in fname_lst: -        f = open(fname) -        s = f.read() -        f.close() +        with open(fname) as f: +            s = f.read()          # Make sure each edge has 10 fields before writing.          lines = lines_with_10_fields(s)          if lines != []: -            write_log_file('[update_network_by_force.py] In function concatenate_edge_files(). File %s has %d rows with 10 columns.' % (fname, len(lines)), UPDATE_NETWORK_LOG_FILE)              # do not write duplicate lines, to save space              kept_lines = []              for line in lines: -                is_new = True -                for fn in glob.glob(os.path.join(dir_out, 'edges.txt.*')): -                    with open(fn) as f2: -                        existing_lines = lines_with_10_fields(f2.read()) -                    if line in existing_lines: -                        is_new = False -                        break -                if is_new: +                if new_edge_line(line, glob.glob(os.path.join(dir_out, 'edges.txt.*'))):                      kept_lines.append(line) -            fout.write('\n'.join(kept_lines) + '\n') -        else: -            write_log_file('[update_network_by_force.py] In function concatenate_edge_files(). Check file %s.  It has no rows with 10 fields.' % (fname), UPDATE_NETWORK_LOG_FILE) +            if kept_lines != []: +                fout.write('\n'.join(kept_lines) + '\n')      fout.close()  def delete_edge_files(fname_lst):      age_in_hours = 6      for fname in fname_lst: -        # Before we delete, we should make sure it is not being written. Make sure it is old enough. Otherwise, don't delete. +        # Before we delete a file, we should make sure it is not being updated. Make sure it is old enough. Otherwise, don't delete.          if age_of_file_in_seconds(fname) > age_in_hours*60*60: # 6 hours              os.remove(fname)          else: @@ -109,31 +108,26 @@ def summarize_edge_file(fname):  ########## Merge edges ####################### -# Update edges.txt, a merged file from two sources, HISTORY_DIR and HISTORY_DIR2. Some new edge files are being generated ... +# Update edges.txt, a merged file from two sources, HISTORY_DIR and HISTORY_DIR2. Some new edge files are being generated there ...  # Definition of HISTORY_DIR and HISTORY_DIR2 could be found in configure.py -time.sleep(3) -edge_file_lst = [] # collect edge files. +time.sleep(10) +edge_file_lst = [] # collect edge files (file names).  most_recent_edge_modification_time = 0 -write_log_file('[update_network_by_force.py] Look at edge files in %s.' % (HISTORY_DIR), UPDATE_NETWORK_LOG_FILE) -for fname in glob.glob(os.path.join(HISTORY_DIR, 'edges.txt.*')): # many small edges.txt.* are to be merged -    edge_file_lst.append(fname) -    if os.path.getmtime(fname) > most_recent_edge_modification_time: -        most_recent_edge_modification_time = os.path.getmtime(fname) - -write_log_file('[update_network_by_force.py] Look at edge files in %s.' % (HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE) -for fname in glob.glob(os.path.join(HISTORY_DIR2, 'edges.txt.*')): # many edges.txt.* are to be merged -    edge_file_lst.append(fname) -    if os.path.getmtime(fname) > most_recent_edge_modification_time: -        most_recent_edge_modification_time = os.path.getmtime(fname) +for history_directory in [HISTORY_DIR, HISTORY_DIR2]: +    write_log_file('[update_network_by_force.py] Look at edge files in %s.' % (HISTORY_DIR), UPDATE_NETWORK_LOG_FILE) +    for fname in glob.glob(os.path.join(history_directory, 'edges.txt.*')): # many small edges.txt.* are to be merged +        edge_file_lst.append(fname) +        if os.path.getmtime(fname) > most_recent_edge_modification_time: +            most_recent_edge_modification_time = os.path.getmtime(fname)  if edge_file_lst == []:      write_log_file('[update_network_by_force.py] No edge files to merge in %s and %s.' % (HISTORY_DIR, HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE)  elif os.path.getmtime(MERGED_EDGE_FILE) < most_recent_edge_modification_time: # update edges.txt only if there are newer edges to add. -    # concatenate edge files into one +    # concatenate edge files into one and store in EDGE_POOL_DIR      write_log_file('[update_network_by_force.py] Concatenate edge files in %s and %s into one file.' % (HISTORY_DIR, HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE)          curr_time = datetime.now().strftime('%Y%m%d_%H%M') -    concatenate_edge_files(edge_file_lst, EDGE_POOL_DIR, 'edges.txt.many.one.targets.' + curr_time) +    concatenate_edge_files(edge_file_lst, EDGE_POOL_DIR, 'edges.txt.many.one.targets.' + curr_time) # this will update EDGE_POOL_DIR      delete_edge_files(edge_file_lst) # delete these files only when they are no longer being written.  if os.path.getmtime(MERGED_EDGE_FILE) < os.path.getmtime(EDGE_POOL_DIR): # edge pool directory has been updated, create a new edges.txt @@ -141,49 +135,47 @@ if os.path.getmtime(MERGED_EDGE_FILE) < os.path.getmtime(EDGE_POOL_DIR): # edge      write_log_file('[update_network_by_force.py] Number of lines in the old edges.txt: %d.' % (num_line(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE)      write_log_file('[update_network_by_force.py] %s' % (summarize_edge_file(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE)      Sold = utils.get_edge_set(MERGED_EDGE_FILE) # all old edges stored in a set -    cmd = 'python3 merge_edges.py'  # invoke another script the merge all edge files in EDGE_POOL_DIR +    cmd = 'python3 merge_edges.py'  # invoke another script to merge all edge files in EDGE_POOL_DIR      return_value = os.system(cmd)      if return_value != 0: -        write_log_file('[update_network_by_force.py] Something wrong occurred to merge_edges.py.  Perhaps your computer is running out of memory.', UPDATE_NETWORK_LOG_FILE) +        write_log_file('[update_network_by_force.py] WARNING: something wrong occurred to merge_edges.py.  Perhaps your computer is running out of memory.', UPDATE_NETWORK_LOG_FILE)      write_log_file('[update_network_by_force.py] Number of lines in the new edges.txt: %d.' % (num_line(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE)      write_log_file('[update_network_by_force.py] %s' % (summarize_edge_file(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE)      Snew = utils.get_edge_set(MERGED_EDGE_FILE) # all new edges stored in a set. Note that MERGED_EDGE_FILE has been updated by 'python3 merge_edges.py'      utils.make_new_edges_file(Sold, Snew, MERGED_EDGE_FILE, DIFF_EDGE_FILE) -    manual_copy_commands = 'Please copy files to the web application: sudo cp /home/lanhui/brain/Data/temp/edges.txt /var/www/brain/brain/static/edges/edges.txt sudo cp /home/lanhui/brain/Data/temp/html_edges/edges.sqlite /var/www/brain/brain/static/edges curl http://118.25.96.118/brain/before' +    manual_copy_commands = 'MANUAL: Please copy edges.txt to the web application: sudo cp /home/lanhui/brain/Data/temp/edges.txt /var/www/brain/brain/static/edges/edges.txt sudo cp /home/lanhui/brain/Data/temp/html_edges/edges.sqlite /var/www/brain/brain/static/edges curl http://118.25.96.118/brain/before'      write_log_file('[update_network_by_force.py] %s' % (manual_copy_commands), UPDATE_NETWORK_LOG_FILE) -    write_log_file('[update_network_by_force.py] Make html files for the web application.', UPDATE_NETWORK_LOG_FILE) +    write_log_file('[update_network_by_force.py] Make HTML files for the web application.', UPDATE_NETWORK_LOG_FILE)      cmd = 'python3 html_network.py -f %s -r %s -c %s -n %s' % (MERGED_EDGE_FILE, PARAMETER_FOR_BUILDRMATRIX, PARAMETER_FOR_BUILDCMATRIX, PARAMETER_FOR_NET)      os.system(cmd)      if datetime.now().day % 28 == 0:          copy_and_backup_file(MERGED_EDGE_FILE, '../Analysis') # the backup file will be used for further analysis -  # Compute overlap -f = open('../Data/temp/AtRegNet.20210208.csv') -AtRegNet_dict = {} -for line in f: -    line = line.strip() -    lst = line.split(',') -    if lst[0] != 'TFName' and len(lst) > 4: -        tf = lst[1].upper().strip() -        target = lst[4].upper().strip() -        AtRegNet_dict[tf+target] = 100 -f.close() - -f = open(MERGED_EDGE_FILE) -BrainEdges_dict = {} -for line in f: -    line = line.strip() -    lst = line.split('\t') -    tf = lst[1].split()[0] -    target = lst[0].split()[0] -    score = float(lst[8]) -    BrainEdges_dict[tf+target] = score -f.close() - -overlap = Overlap(BrainEdges_dict, 3, AtRegNet_dict, 0) -write_log_file('[update_network_by_force.py] Performance stats - TP:%d, PP:%d, Hit rate: %4.7f while comparing with AtRegNet.20210208.csv.' % (overlap.getTP(), overlap.getNumberOfPositivesInPred(), overlap.getTP()/overlap.getNumberOfPositivesInPred()), UPDATE_NETWORK_LOG_FILE) +gold_standard_file = '../Data/temp/AtRegNet.20210208.csv' +if os.path.exists(gold_standard_file) and os.path.exists(MERGED_EDGE_FILE): +    AtRegNet_dict = {} +    with open(gold_standard_file) as f: +        for line in f: +            line = line.strip() +            lst = line.split(',') +            if lst[0] != 'TFName' and len(lst) > 4: +                tf = lst[1].upper().strip() +                target = lst[4].upper().strip() +                AtRegNet_dict[tf+target] = 100 + +    BrainEdges_dict = {} +    with open(MERGED_EDGE_FILE) as f: +        for line in f: +            line = line.strip() +            lst = line.split('\t') +            tf = lst[1].split()[0] +            target = lst[0].split()[0] +            score = float(lst[8]) +            BrainEdges_dict[tf+target] = score + +    overlap = Overlap(BrainEdges_dict, 3, AtRegNet_dict, 0) +    write_log_file('[update_network_by_force.py] Performance stats - TP:%d, PP:%d, Hit rate: %4.7f while comparing with AtRegNet.20210208.csv.' % (overlap.getTP(), overlap.getNumberOfPositivesInPred(), overlap.getTP()/overlap.getNumberOfPositivesInPred()), UPDATE_NETWORK_LOG_FILE)  write_log_file('[update_network_by_force.py] Update done at %s.\n\n' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S')), UPDATE_NETWORK_LOG_FILE) - | 
