# Usage: python3 update_network_by_force.py
# Purpose: update_network.py could take a few days to run.  Run this script to harvest new edges everyday.
#
# Revision history:
# Last modified: 24 Nov 2019, hui <lanhui@zjnu.edu.cn>

import os, sys
import glob
import time
from datetime import datetime
from configure import HISTORY_DIR, HISTORY_DIR2, UPDATE_NETWORK_LOG_FILE, MERGED_EDGE_FILE, EDGE_POOL_DIR
from backup_files import copy_and_backup_file

########## Helper functions #######################
def write_log_file(s, fname):
    f = open(fname, 'a')
    curr_time = datetime.now().strftime('%Y-%m-%d %H:%M')
    s = '[' + curr_time + ']: ' + s
    if not '\n' in s:
        s += '\n'
    f.write(s)
    f.close()
    print('Log: %s' % (s.strip()))


def num_line(fname):
    ''' Return number of lines in file fname. '''
    if not os.path.exists(fname):
        return 0
    f = open(fname)
    lines = f.readlines()
    f.close()
    return len(lines)


def lines_with_10_fields(s):
    result = []
    for line in s.split('\n'):
        line = line.strip()
        if len(line.split('\t')) == 10:
            result.append(line)
    return result


def age_of_file_in_seconds(fname):
    ''' Return age of fname in days. '''
    st = os.stat(fname)
    seconds = time.time() - st.st_mtime
    return seconds

def concatenate_edge_files(fname_lst, fname_out):
    fout = open(fname_out, 'w')
    for fname in fname_lst:
        f = open(fname)
        s = f.read()
        f.close()
        # Make sure each edge has 10 fields before writing.
        lines = lines_with_10_fields(s)
        if lines != []:
            write_log_file('[update_network_by_force.py] In function concatenate_edge_files. File %s has %d rows with 10 columns.' % (fname, len(lines)), UPDATE_NETWORK_LOG_FILE)
            fout.write('\n'.join(lines) + '\n')
        else:
            write_log_file('[update_network_by_force.py] In function concatenate_edge_files. Check file %s.  It has no rows with 10 fields.' % (fname), UPDATE_NETWORK_LOG_FILE)            
    fout.close()
        

def delete_edge_files(fname_lst):
    age_in_hours = 6
    for fname in fname_lst:
        # Before we delete, we should make sure it is not being written. Make sure it is old enough. Otherwise, don't delete.
        if age_of_file_in_seconds(fname) > age_in_hours*60*60: # 6 hours
            os.remove(fname)
        else:
            write_log_file('[update_network_by_force.py] In function delete_edge_files. Check file %s.  It is probably still being written (age less than %d hours).  So I don\'t delete it.' % (fname, age_in_hours), UPDATE_NETWORK_LOG_FILE)


def summarize_edge_file(fname):
    ''' Return number of lines in file fname. '''
    if not os.path.exists(fname):
        return 'File %s does not exist.' % (fname)
    f = open(fname)
    tau = 2.0
    count_below = 0
    count_above = 0
    count_total = 0
    for line in f:
        line = line.strip()
        lst = line.split('\t')
        if len(lst) == 10:
            association_strength = float(lst[8])
            count_total += 1
            if association_strength > tau:
                count_above += 1
            else:
                count_below += 1
    f.close()
    if count_total > 0:
        return '#edges above %4.1f: %d (%4.3f percent), #edges below %4.1f: %d (%4.3f percent).' % (tau, count_above, 100.0*count_above/count_total, tau, count_below, 100.0*count_below/count_total)
    else:
        return 'Total edges is 0.'


########## Renew saved G.pickle and SOURCE_NODES.pickle in Webapp #######################
cmd = 'curl http://118.25.96.118/brain/before'
os.system(cmd)

########## Merge edges #######################
# update edges.txt, a merged file from two sources, HISTORY_DIR and HISTORY_DIR2. Some new edge files are being generated ...
time.sleep(3)
edge_file_lst = [] # collect edge files.
most_recent_edge_modification_time = 0
write_log_file('[update_network_by_force.py] Look at edge files in %s.' % (HISTORY_DIR), UPDATE_NETWORK_LOG_FILE)
for fname in glob.glob(os.path.join(HISTORY_DIR, 'edges.txt.*')): # many small edges.txt.* are to be merged
    edge_file_lst.append(fname)
    if os.path.getmtime(fname) > most_recent_edge_modification_time:
        most_recent_edge_modification_time = os.path.getmtime(fname)

write_log_file('[update_network_by_force.py] Look at edge files in %s.' % (HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE)
for fname in glob.glob(os.path.join(HISTORY_DIR2, 'edges.txt.*')): # edges.txt.* are to be merged
    edge_file_lst.append(fname)
    if os.path.getmtime(fname) > most_recent_edge_modification_time:
        most_recent_edge_modification_time = os.path.getmtime(fname)


if edge_file_lst == []:
    write_log_file('[update_network_by_force.py] No edge files to merge in %s and %s.' % (HISTORY_DIR, HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE)
elif os.path.getmtime(MERGED_EDGE_FILE) < most_recent_edge_modification_time: # update edges.txt only if there are newer edges to add.
    # concatenate edge files into one
    write_log_file('[update_network_by_force.py] Concatenate edge files in %s and %s into one file.' % (HISTORY_DIR, HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE)    
    curr_time = datetime.now().strftime('%Y%m%d_%H%M')
    concatenate_edge_files(edge_file_lst, os.path.join(EDGE_POOL_DIR, 'edges.txt.many.one.targets.' + curr_time))
    delete_edge_files(edge_file_lst)

if os.path.getmtime(MERGED_EDGE_FILE) < os.path.getmtime(EDGE_POOL_DIR): # edge pool directory has been updated, create new edges.txt
    write_log_file('[update_network_by_force.py] Make a new edges.txt from edge files in %s.' % (EDGE_POOL_DIR), UPDATE_NETWORK_LOG_FILE)
    write_log_file('[update_network_by_force.py] Number of lines in the old edges.txt: %d.' % (num_line(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE)
    write_log_file('[update_network_by_force.py] %s' % (summarize_edge_file(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE)
    cmd = 'python3 merge_edges.py'
    os.system(cmd)
    write_log_file('[update_network_by_force.py] Number of lines in the new edges.txt: %d.' % (num_line(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE)
    write_log_file('[update_network_by_force.py] %s' % (summarize_edge_file(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE)    
    manual_copy_commands = 'Please copy files to the web application: sudo cp /home/lanhui/brain/Data/temp/edges.txt /var/www/brain/brain/static/edges/edges.txt sudo cp /home/lanhui/brain/Data/temp/html_edges/edges.sqlite /var/www/brain/brain/static/edges curl http://118.25.96.118/brain/before'
    write_log_file('[update_network_by_force.py] %s' % (manual_copy_commands), UPDATE_NETWORK_LOG_FILE)    
    copy_and_backup_file(MERGED_EDGE_FILE, '../Analysis') # the backup file will be used for further analysis


write_log_file('[update_network_by_force.py] Update done at %s.\n\n' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S')), UPDATE_NETWORK_LOG_FILE)