1
0
Fork 0

Compare commits

...

6 Commits

Author SHA1 Message Date
汪瑜 b486b6b9db 新增可被分隔的中文符号 2023-05-12 23:22:52 +08:00
汪瑜 0fedf590e8 更新 'app/wordfreqCMD.py' 2023-05-04 17:40:44 +08:00
汪瑜 96dfadcde6 更新 'app/wordfreqCMD.py' 2023-05-04 17:39:48 +08:00
杨昱晨 7b55fc1859 first commit 2023-05-04 17:18:04 +08:00
杨昱晨 ac2046ac2e first commit 2023-05-04 17:02:28 +08:00
whiost baa1c45782 [Bugfix] 部分中文符号不会被分隔 2022-12-08 16:03:38 +08:00
2 changed files with 4 additions and 2 deletions

View File

@ -39,7 +39,8 @@ def file2str(fname):#文件转字符
def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。 def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。
special_characters = '\_©~<=>+-/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉 special_characters = '\_©~<=>+-/[]*&$%^@.,?!:;#()"“”—‘’{}|《》【】、!¥();:?。,' # 把里面的字符都去掉
for c in special_characters: for c in special_characters:
s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况 s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
s = s.replace('--', ' ') s = s.replace('--', ' ')
@ -103,6 +104,7 @@ if __name__ == '__main__':
for x in sort_in_descending_order(L): for x in sort_in_descending_order(L):
print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0])))#函数导出 print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0])))#函数导出
# 把频率的结果放result.html中 # 把频率的结果放result.html中
make_html_page(sort_in_descending_order(L), 'result.html') make_html_page(sort_in_descending_order(L), 'result.html')
@ -117,6 +119,7 @@ if __name__ == '__main__':
# 合并频率 # 合并频率
lst_history = pickle_idea.dict2lst(d) lst_history = pickle_idea.dict2lst(d)
d = pickle_idea.merge_frequency(L, lst_history) d = pickle_idea.merge_frequency(L, lst_history)
pickle_idea.save_frequency_to_pickle(d, 'frequency.p') pickle_idea.save_frequency_to_pickle(d, 'frequency.p')

View File

@ -1,4 +1,3 @@
Flask==1.1.2 Flask==1.1.2
selenium==3.141.0 selenium==3.141.0
PyYAML~=6.0 PyYAML~=6.0
pony==0.7.16