From 9375d5536c060eaa6c132f7533f8486abfd04074 Mon Sep 17 00:00:00 2001 From: Lan Hui <1348141770@qq.com> Date: Wed, 14 Jul 2021 15:05:47 +0800 Subject: Upload Jin Xiongrong's work -- https://gitee.com/dragondove/storode; fix UnicodeDecodeError --- src/collision.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 src/collision.py (limited to 'src/collision.py') diff --git a/src/collision.py b/src/collision.py new file mode 100644 index 0000000..5d87cf7 --- /dev/null +++ b/src/collision.py @@ -0,0 +1,54 @@ +import hashlib +import random + +# collision tests for my identifier solution + + +def get_identifier(s): + m2 = hashlib.md5() + m2.update(s.encode('utf-8')) + digest = m2.hexdigest()[-4:] + return digest + + +def pick_random_text(lines, num): + length = len(text_lines) + s = '' + for i in range(num): + s += lines[random.randint(0, num)] + + return s + + +def get_text_lines_from_file(file_path): + lines = [] + with open(file_path, encoding='utf8') as f: + for line in f.readlines(): + if (line.strip() != ''): + lines.append(line) + + return lines + + +if __name__ == "__main__": + text_lines = get_text_lines_from_file('./sample/Wonderland.txt') + total_sample_count = 100 + sample_size = 8 + test_times = 100 + collision_counts = [] + for i in range(test_times): + collision_count = 0 + digest_set = set() + for j in range(total_sample_count): + digest = get_identifier(pick_random_text(text_lines, sample_size)) + if digest in digest_set: + collision_count += 1 + else: + digest_set.add(digest) + + collision_counts.append(collision_count) + + avg_collision_count = sum(collision_counts) / test_times + print('average collision happened count: ' + str(avg_collision_count)) + print('collision rate: ' + + str(100 * (avg_collision_count / total_sample_count)) + '%') -- cgit v1.2.1