forked from mrlan/EnglishPal
				
			Compare commits
	
		
			2 Commits 
		
	
	
		
			Bug585-zha
			...
			Bug476-Yuh
		
	
	| Author | SHA1 | Date | 
|---|---|---|
|  | 4e1e19d71d | |
|  | 3361e4ba79 | 
|  | @ -6,10 +6,14 @@ | ||||||
| # Purpose: compute difficulty level of a English text | # Purpose: compute difficulty level of a English text | ||||||
| 
 | 
 | ||||||
| import pickle | import pickle | ||||||
| import math | import time | ||||||
| from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order | 
 | ||||||
| import snowballstemmer | import snowballstemmer | ||||||
| 
 | 
 | ||||||
|  | from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order | ||||||
|  | 
 | ||||||
|  | # 定义一个全局的res_d, 记录数据库单词评级之后的单词及其等级 | ||||||
|  | res_d = {} | ||||||
| 
 | 
 | ||||||
| def load_record(pickle_fname): | def load_record(pickle_fname): | ||||||
|     f = open(pickle_fname, 'rb') |     f = open(pickle_fname, 'rb') | ||||||
|  | @ -24,6 +28,7 @@ def convert_test_type_to_difficulty_level(d): | ||||||
|     :param d: 存储了单词库pickle文件中的单词的字典 |     :param d: 存储了单词库pickle文件中的单词的字典 | ||||||
|     :return: |     :return: | ||||||
|     """ |     """ | ||||||
|  |     time_start = time.time() | ||||||
|     result = {} |     result = {} | ||||||
|     L = list(d.keys())  # in d, we have test types (e.g., CET4,CET6,BBC) for each word |     L = list(d.keys())  # in d, we have test types (e.g., CET4,CET6,BBC) for each word | ||||||
| 
 | 
 | ||||||
|  | @ -38,7 +43,10 @@ def convert_test_type_to_difficulty_level(d): | ||||||
|             result[k] = 7 |             result[k] = 7 | ||||||
|         elif 'BBC' in d[k]: |         elif 'BBC' in d[k]: | ||||||
|             result[k] = 8 |             result[k] = 8 | ||||||
| 
 |     time_end = time.time() | ||||||
|  |     print('convert_test_type_to_difficulty_level totally cost', time_end - time_start) | ||||||
|  |     global res_d | ||||||
|  |     res_d = result | ||||||
|     return result  # {'apple': 4, ...} |     return result  # {'apple': 4, ...} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -48,8 +56,12 @@ def get_difficulty_level_for_user(d1, d2): | ||||||
|     d1 用户不会的词 |     d1 用户不会的词 | ||||||
|     在d2的后面添加单词,没有新建一个新的字典 |     在d2的后面添加单词,没有新建一个新的字典 | ||||||
|     """ |     """ | ||||||
|  |     time_start = time.time() | ||||||
|     # TODO: convert_test_type_to_difficulty_level() should not be called every time.  Each word's difficulty level should be pre-computed. |     # TODO: convert_test_type_to_difficulty_level() should not be called every time.  Each word's difficulty level should be pre-computed. | ||||||
|  |     if res_d == {}: | ||||||
|         d2 = convert_test_type_to_difficulty_level(d2)  # 根据d2的标记评级{'apple': 4, 'abandon': 4, ...} |         d2 = convert_test_type_to_difficulty_level(d2)  # 根据d2的标记评级{'apple': 4, 'abandon': 4, ...} | ||||||
|  |     else: | ||||||
|  |         d2 = res_d | ||||||
|     stemmer = snowballstemmer.stemmer('english') |     stemmer = snowballstemmer.stemmer('english') | ||||||
| 
 | 
 | ||||||
|     for k in d1:  # 用户的词 |     for k in d1:  # 用户的词 | ||||||
|  | @ -61,6 +73,8 @@ def get_difficulty_level_for_user(d1, d2): | ||||||
|                 d2[k] = d2[stem]  # 按照词根进行评级 |                 d2[k] = d2[stem]  # 按照词根进行评级 | ||||||
|             else: |             else: | ||||||
|                 d2[k] = 3  # 如果k的词根都不在,那么就当认为是3级 |                 d2[k] = 3  # 如果k的词根都不在,那么就当认为是3级 | ||||||
|  |     time_end = time.time() | ||||||
|  |     print('get_difficulty_level_for_user totally cost', time_end - time_start) | ||||||
|     return d2 |     return d2 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue