Code/overlap.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76

# Author: Hui Lan <lanhui@zjnu.edu.cn>
class Overlap:

    ''' Compare two networks and compute hit rate '''
    
    def __init__(self, predicted_edges={}, tau1=None, gold_standard_edges={}, tau2=None):
        ''' predicted_edges: dictionary {'TF * Target': score}
            gold_standard_edges: dictionary {'TF * Target': score}
        '''
        def get_edges_above_threshold(d, tau):
            d2 = {}
            for k in d:
                if d[k] > tau:
                    d2[k] = score
            return d2
        
        self.pred = predicted_edges
        self.true = gold_standard_edges
        if tau1 != None:
            self.pred = get_edges_above_threshold(self.pred, tau1)
        if tau2 != None:
            self.true = get_edges_above_threshold(self.true, tau2)


    def getTP(self):
        tp = 0
        for k in self.pred:
            if k in self.true:
                tp += 1
                
        return tp

    def getFP(self):
        fp = 0
        for k in self.pred:
            if not k in self.true:
                fp += 1
                
        return fp

    def getNumberOfPositivesInPred(self):
        return len(self.pred)

    def getNumberOfPositivesInTrue(self):
        return len(self.true)


if __name__ == '__main__':
    f = open('../Data/temp/AtRegNet.20210208.csv')
    AtRegNet_dict = {}
    for line in f:
        line = line.strip()
        lst = line.split(',')
        if lst[0] != 'TFName' and len(lst) > 4:
            tf = lst[1].upper().strip()
            target = lst[4].upper().strip()
            AtRegNet_dict[tf+target] = 100
    f.close()

    f = open('../Data/temp/edges.txt')
    BrainEdges_dict = {}
    for line in f:
        line = line.strip()
        lst = line.split('\t')
        tf = lst[1].split()[0]
        target = lst[0].split()[0]
        score = float(lst[8])
        BrainEdges_dict[tf+target] = score
    f.close()

    overlap = Overlap(BrainEdges_dict, 3, AtRegNet_dict, 0)
    print('TP:%d, PP:%d, Hit rate: %4.7f while comparing with AtRegNet.20210208.csv.' % (overlap.getTP(), overlap.getNumberOfPositivesInPred(), overlap.getTP()/overlap.getNumberOfPositivesInPred()))