summaryrefslogtreecommitdiff
path: root/Code/degree_of_separation.py
blob: b7eba92c709b47d5ff8bef64cf7b57acede008a9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# Usage: python degree_of_deparation.py edges.txt
# Purpose: get the maximum degree of separation
import os, sys
import networkx as nx
#import util_networkx
from networkx.algorithms.distance_measures import diameter

def build_network_from_file(edge_fname):
    G = nx.DiGraph()    
    source_nodes = []    
    f = open(edge_fname)
    for line in f:
        line = line.strip()
        lst = line.split('\t')        
        if line != '':
            strength = float(lst[8])
            method_or_tissue = lst[9]            
            g1 = lst[0].split()[0] # target gene ID
            g1_label = lst[0].split()[1].split(';')[0] if lst[0].split()[1] != '.' else g1
            g1_name = lst[0].split()[1] if lst[0].split()[1] != '.' else ''
            g2 = lst[1].split()[0] # source gene ID
            g2_label = lst[1].split()[1].split(';')[0] if lst[1].split()[1] != '.' else g2
            g2_name = lst[1].split()[1] if lst[1].split()[1] != '.' else ''
            G.add_node(g1, full_name=g1_name, label=g1_label) # if g1 is also a TF, then istf='0' will overwrite it in the following for loop
            G.add_node(g2, full_name=g2_name, label=g2_label) # tf_category contains default TF category code.  It can be modified later given user's input

            G.add_edge(g2, g1, weight=float(lst[2]), strength=strength, method=method_or_tissue) # g2 is source, and g1 is target
            source_nodes.append(g2)

    f.close()

    source_nodes = list(set(source_nodes))
    return G, source_nodes


## main
print('Load graph...')
G, source_nodes = build_network_from_file(sys.argv[1])
print('Convert to undirected...')
G2 = G.to_undirected()
print('Compute diameter...')
d1 = diameter(G2)
print('Graph diameter: %d' % (d1))