1、添加误删的IELTS单词标签

2、简化评级流程(快很多,几乎与网站相同)
3、删除了原先用于提取词根的函数
Bug476-ZhangWeiHao-YuHuangtao
俞黄焘 2023-05-18 20:36:31 +08:00
parent c4378e73cd
commit a83f91a88d
1 changed files with 5 additions and 23 deletions

View File

@ -24,7 +24,7 @@ def convert_test_type_to_difficulty_level(d):
:return: :return:
""" """
result = {} result = {}
L = list(d.keys()) # in dic, we have test types (e.g., CET4,CET6,BBC) for each word L = list(d.keys()) # in d, we have test types (e.g., CET4,CET6,BBC) for each word
for k in L: for k in L:
if 'CET4' in d[k]: if 'CET4' in d[k]:
@ -33,30 +33,13 @@ def convert_test_type_to_difficulty_level(d):
result[k] = 5 result[k] = 5
elif 'CET6' in d[k] or 'GRADUATE' in d[k]: elif 'CET6' in d[k] or 'GRADUATE' in d[k]:
result[k] = 6 result[k] = 6
elif 'OXFORD5000' in d[k]: elif 'OXFORD5000' in d[k] or 'IELTS' in d[k]:
result[k] = 7 result[k] = 7
elif 'BBC' in d[k]: elif 'BBC' in d[k]:
result[k] = 8 result[k] = 8
return result # {'apple': 4, ...} return result # {'apple': 4, ...}
def simplify_the_words_dict(d):
"""
用于把保存了词库中评级后的词新建一个以词根为键以同词根的最低等级为值
"""
stem = snowballstemmer.stemmer('english')
result = {}
for k in d: # j 在字典中
temp = stem.stemWord(k) # 提取j得词根
if temp not in result: # 如果这个词根不在结果字典中则以词根为键、以dic中的等级作为值添加
result[temp] = d[k]
else: # 如果这个词在结果词典中,则比较一下单词的难度等级是否最小
if result[temp] > d[k]:
result[temp] = d[k]
return result
def get_difficulty_level_for_user(d1, d2): def get_difficulty_level_for_user(d1, d2):
""" """
@ -65,14 +48,13 @@ def get_difficulty_level_for_user(d1, d2):
在d2的后面添加单词没有新建一个新的字典 在d2的后面添加单词没有新建一个新的字典
""" """
d2 = convert_test_type_to_difficulty_level(d2) # 根据d2的标记评级{'apple': 4, 'abandon': 4, ...} d2 = convert_test_type_to_difficulty_level(d2) # 根据d2的标记评级{'apple': 4, 'abandon': 4, ...}
d2_simplified = simplify_the_words_dict(d2) # 提取d2的词根 {'appl': 4, 'abandon': 4, ...}
stem = snowballstemmer.stemmer('english') stem = snowballstemmer.stemmer('english')
for k in d1: # 用户的词 for k in d1: # 用户的词
if k in d2: # 如果用户的词以原型的形式存在于词库d2中 if k in d2: # 如果用户的词以原型的形式存在于词库d2中
continue # 无需评级,跳过 continue # 无需评级,跳过
elif stem.stemWord(k) in d2_simplified: # 如果用户的词的词根存在于词库d2的词根库中 elif stem.stemWord(k) in d2: # 如果用户的词的词根存在于词库d2的词根库中
d2[k] = d2_simplified[k] # 按照词根进行评级 d2[k] = d2[stem.stemWord(k)] # 按照词根进行评级
break break
else: else:
d2[k] = 3 # 如果k的词根都不在那么就当认为是3级 d2[k] = 3 # 如果k的词根都不在那么就当认为是3级
@ -108,7 +90,7 @@ def user_difficulty_level(d_user, d):
for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level
lst = d_user2[date] # a list of words lst = d_user2[date] # a list of words
lst2 = [] # a list of tuples, (word, difficulty level) lst2 = [] # a list of tuples, (word, difficulty level)
for word in lst: for word in lst:
if word in d: if word in d:
lst2.append((word, d[word])) lst2.append((word, d[word]))