forked from mrlan/EnglishPal
				
			Compare commits
	
		
			2 Commits 
		
	
	
		
			Bug546-Lix
			...
			Bug476-Yuh
		
	
	| Author | SHA1 | Date | 
|---|---|---|
|  | 4e1e19d71d | |
|  | 3361e4ba79 | 
|  | @ -6,10 +6,14 @@ | |||
| # Purpose: compute difficulty level of a English text | ||||
| 
 | ||||
| import pickle | ||||
| import math | ||||
| from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order | ||||
| import time | ||||
| 
 | ||||
| import snowballstemmer | ||||
| 
 | ||||
| from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order | ||||
| 
 | ||||
| # 定义一个全局的res_d, 记录数据库单词评级之后的单词及其等级 | ||||
| res_d = {} | ||||
| 
 | ||||
| def load_record(pickle_fname): | ||||
|     f = open(pickle_fname, 'rb') | ||||
|  | @ -24,6 +28,7 @@ def convert_test_type_to_difficulty_level(d): | |||
|     :param d: 存储了单词库pickle文件中的单词的字典 | ||||
|     :return: | ||||
|     """ | ||||
|     time_start = time.time() | ||||
|     result = {} | ||||
|     L = list(d.keys())  # in d, we have test types (e.g., CET4,CET6,BBC) for each word | ||||
| 
 | ||||
|  | @ -38,7 +43,10 @@ def convert_test_type_to_difficulty_level(d): | |||
|             result[k] = 7 | ||||
|         elif 'BBC' in d[k]: | ||||
|             result[k] = 8 | ||||
| 
 | ||||
|     time_end = time.time() | ||||
|     print('convert_test_type_to_difficulty_level totally cost', time_end - time_start) | ||||
|     global res_d | ||||
|     res_d = result | ||||
|     return result  # {'apple': 4, ...} | ||||
| 
 | ||||
| 
 | ||||
|  | @ -48,8 +56,12 @@ def get_difficulty_level_for_user(d1, d2): | |||
|     d1 用户不会的词 | ||||
|     在d2的后面添加单词,没有新建一个新的字典 | ||||
|     """ | ||||
|     time_start = time.time() | ||||
|     # TODO: convert_test_type_to_difficulty_level() should not be called every time.  Each word's difficulty level should be pre-computed. | ||||
|     if res_d == {}: | ||||
|         d2 = convert_test_type_to_difficulty_level(d2)  # 根据d2的标记评级{'apple': 4, 'abandon': 4, ...} | ||||
|     else: | ||||
|         d2 = res_d | ||||
|     stemmer = snowballstemmer.stemmer('english') | ||||
| 
 | ||||
|     for k in d1:  # 用户的词 | ||||
|  | @ -61,6 +73,8 @@ def get_difficulty_level_for_user(d1, d2): | |||
|                 d2[k] = d2[stem]  # 按照词根进行评级 | ||||
|             else: | ||||
|                 d2[k] = 3  # 如果k的词根都不在,那么就当认为是3级 | ||||
|     time_end = time.time() | ||||
|     print('get_difficulty_level_for_user totally cost', time_end - time_start) | ||||
|     return d2 | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue