forked from mrlan/EnglishPal
Compare commits
2 Commits
Bug546-Lix
...
Bug476-Yuh
Author | SHA1 | Date |
---|---|---|
俞黄焘 | 4e1e19d71d | |
俞黄焘 | 3361e4ba79 |
|
@ -6,10 +6,14 @@
|
|||
# Purpose: compute difficulty level of a English text
|
||||
|
||||
import pickle
|
||||
import math
|
||||
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
|
||||
import time
|
||||
|
||||
import snowballstemmer
|
||||
|
||||
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
|
||||
|
||||
# 定义一个全局的res_d, 记录数据库单词评级之后的单词及其等级
|
||||
res_d = {}
|
||||
|
||||
def load_record(pickle_fname):
|
||||
f = open(pickle_fname, 'rb')
|
||||
|
@ -24,6 +28,7 @@ def convert_test_type_to_difficulty_level(d):
|
|||
:param d: 存储了单词库pickle文件中的单词的字典
|
||||
:return:
|
||||
"""
|
||||
time_start = time.time()
|
||||
result = {}
|
||||
L = list(d.keys()) # in d, we have test types (e.g., CET4,CET6,BBC) for each word
|
||||
|
||||
|
@ -38,7 +43,10 @@ def convert_test_type_to_difficulty_level(d):
|
|||
result[k] = 7
|
||||
elif 'BBC' in d[k]:
|
||||
result[k] = 8
|
||||
|
||||
time_end = time.time()
|
||||
print('convert_test_type_to_difficulty_level totally cost', time_end - time_start)
|
||||
global res_d
|
||||
res_d = result
|
||||
return result # {'apple': 4, ...}
|
||||
|
||||
|
||||
|
@ -48,8 +56,12 @@ def get_difficulty_level_for_user(d1, d2):
|
|||
d1 用户不会的词
|
||||
在d2的后面添加单词,没有新建一个新的字典
|
||||
"""
|
||||
time_start = time.time()
|
||||
# TODO: convert_test_type_to_difficulty_level() should not be called every time. Each word's difficulty level should be pre-computed.
|
||||
d2 = convert_test_type_to_difficulty_level(d2) # 根据d2的标记评级{'apple': 4, 'abandon': 4, ...}
|
||||
if res_d == {}:
|
||||
d2 = convert_test_type_to_difficulty_level(d2) # 根据d2的标记评级{'apple': 4, 'abandon': 4, ...}
|
||||
else:
|
||||
d2 = res_d
|
||||
stemmer = snowballstemmer.stemmer('english')
|
||||
|
||||
for k in d1: # 用户的词
|
||||
|
@ -61,6 +73,8 @@ def get_difficulty_level_for_user(d1, d2):
|
|||
d2[k] = d2[stem] # 按照词根进行评级
|
||||
else:
|
||||
d2[k] = 3 # 如果k的词根都不在,那么就当认为是3级
|
||||
time_end = time.time()
|
||||
print('get_difficulty_level_for_user totally cost', time_end - time_start)
|
||||
return d2
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue