summaryrefslogtreecommitdiff
path: root/src/collision.py
diff options
context:
space:
mode:
authorLan Hui <1348141770@qq.com>2021-07-14 15:05:47 +0800
committerLan Hui <1348141770@qq.com>2021-07-14 15:05:47 +0800
commit9375d5536c060eaa6c132f7533f8486abfd04074 (patch)
tree6d88340bb70f9adec6f5a469686609e5777f8160 /src/collision.py
Upload Jin Xiongrong's work -- https://gitee.com/dragondove/storode; fix UnicodeDecodeError
Diffstat (limited to 'src/collision.py')
-rw-r--r--src/collision.py54
1 files changed, 54 insertions, 0 deletions
diff --git a/src/collision.py b/src/collision.py
new file mode 100644
index 0000000..5d87cf7
--- /dev/null
+++ b/src/collision.py
@@ -0,0 +1,54 @@
+import hashlib
+import random
+
+# collision tests for my identifier solution
+
+
+def get_identifier(s):
+ m2 = hashlib.md5()
+ m2.update(s.encode('utf-8'))
+ digest = m2.hexdigest()[-4:]
+ return digest
+
+
+def pick_random_text(lines, num):
+ length = len(text_lines)
+ s = ''
+ for i in range(num):
+ s += lines[random.randint(0, num)]
+
+ return s
+
+
+def get_text_lines_from_file(file_path):
+ lines = []
+ with open(file_path, encoding='utf8') as f:
+ for line in f.readlines():
+ if (line.strip() != ''):
+ lines.append(line)
+
+ return lines
+
+
+if __name__ == "__main__":
+ text_lines = get_text_lines_from_file('./sample/Wonderland.txt')
+ total_sample_count = 100
+ sample_size = 8
+ test_times = 100
+ collision_counts = []
+ for i in range(test_times):
+ collision_count = 0
+ digest_set = set()
+ for j in range(total_sample_count):
+ digest = get_identifier(pick_random_text(text_lines, sample_size))
+ if digest in digest_set:
+ collision_count += 1
+ else:
+ digest_set.add(digest)
+
+ collision_counts.append(collision_count)
+
+ avg_collision_count = sum(collision_counts) / test_times
+ print('average collision happened count: ' + str(avg_collision_count))
+ print('collision rate: ' +
+ str(100 * (avg_collision_count / total_sample_count)) + '%')