1
0
Fork 0

Compare commits

...

2 Commits

1 changed files with 18 additions and 4 deletions

View File

@ -6,10 +6,14 @@
# Purpose: compute difficulty level of a English text # Purpose: compute difficulty level of a English text
import pickle import pickle
import math import time
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
import snowballstemmer import snowballstemmer
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
# 定义一个全局的res_d 记录数据库单词评级之后的单词及其等级
res_d = {}
def load_record(pickle_fname): def load_record(pickle_fname):
f = open(pickle_fname, 'rb') f = open(pickle_fname, 'rb')
@ -24,6 +28,7 @@ def convert_test_type_to_difficulty_level(d):
:param d: 存储了单词库pickle文件中的单词的字典 :param d: 存储了单词库pickle文件中的单词的字典
:return: :return:
""" """
time_start = time.time()
result = {} result = {}
L = list(d.keys()) # in d, we have test types (e.g., CET4,CET6,BBC) for each word L = list(d.keys()) # in d, we have test types (e.g., CET4,CET6,BBC) for each word
@ -38,7 +43,10 @@ def convert_test_type_to_difficulty_level(d):
result[k] = 7 result[k] = 7
elif 'BBC' in d[k]: elif 'BBC' in d[k]:
result[k] = 8 result[k] = 8
time_end = time.time()
print('convert_test_type_to_difficulty_level totally cost', time_end - time_start)
global res_d
res_d = result
return result # {'apple': 4, ...} return result # {'apple': 4, ...}
@ -48,8 +56,12 @@ def get_difficulty_level_for_user(d1, d2):
d1 用户不会的词 d1 用户不会的词
在d2的后面添加单词没有新建一个新的字典 在d2的后面添加单词没有新建一个新的字典
""" """
time_start = time.time()
# TODO: convert_test_type_to_difficulty_level() should not be called every time. Each word's difficulty level should be pre-computed. # TODO: convert_test_type_to_difficulty_level() should not be called every time. Each word's difficulty level should be pre-computed.
if res_d == {}:
d2 = convert_test_type_to_difficulty_level(d2) # 根据d2的标记评级{'apple': 4, 'abandon': 4, ...} d2 = convert_test_type_to_difficulty_level(d2) # 根据d2的标记评级{'apple': 4, 'abandon': 4, ...}
else:
d2 = res_d
stemmer = snowballstemmer.stemmer('english') stemmer = snowballstemmer.stemmer('english')
for k in d1: # 用户的词 for k in d1: # 用户的词
@ -61,6 +73,8 @@ def get_difficulty_level_for_user(d1, d2):
d2[k] = d2[stem] # 按照词根进行评级 d2[k] = d2[stem] # 按照词根进行评级
else: else:
d2[k] = 3 # 如果k的词根都不在那么就当认为是3级 d2[k] = 3 # 如果k的词根都不在那么就当认为是3级
time_end = time.time()
print('get_difficulty_level_for_user totally cost', time_end - time_start)
return d2 return d2