diff options
author | Hui Lan <lanhui@zjnu.edu.cn> | 2019-11-02 13:21:51 +0800 |
---|---|---|
committer | Hui Lan <lanhui@zjnu.edu.cn> | 2019-11-02 13:21:51 +0800 |
commit | 08037e7d5f9be86c6e209a3e5e25c7429a707e3c (patch) | |
tree | 2dee4453c441c5b386778a7991ddcd9fd867edb6 | |
parent | 7a2d14901d26efdaacec21c8d3b8028c0e4b8b6f (diff) |
app/difficulty.py: adjust difficulty level to reflect test level
If a word appears in CET4 test, it has a difficulty level of 4.
If a word appears in CET6 test, it has a difficulty level of 6.
If a word does not appear in the above two tests, then its difficulty level is determined by its frequency.
If this word is not in the frequency table, then its level is 1.
If this word is in the frequency tables, then its level is determined as follows.
log2( ((F+1) / (f+1)) ), where F is the largest frequency, and f is the word's frequency.
If f=F, then log2(1) = 0, i.e., its difficulty level is 0.
If f<F, then log2(1) is a positive value.
-rw-r--r-- | app/difficulty.py | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/app/difficulty.py b/app/difficulty.py index 1c8a08f..dbf6cf0 100644 --- a/app/difficulty.py +++ b/app/difficulty.py @@ -19,15 +19,15 @@ def load_record(pickle_fname): def difficulty_level_from_frequency(word, d): - level = 0 + level = 1 if not word in d: return level if 'what' in d: ratio = (d['what']+1)/(d[word]+1) # what is a frequent word - level = math.log( max(ratio, 1), 10) + level = math.log( max(ratio, 1), 2) - level = min(level, 4) + level = min(level, 8) return level @@ -40,11 +40,11 @@ def get_difficulty_level(d1, d2): for k in L3: if k in d2: if 'CET4' in d2[k]: - d[k] = 1 # CET4 word has level 1 + d[k] = 4 # CET4 word has level 4 elif 'CET6' in d2[k]: - d[k] = 2 + d[k] = 6 elif 'BBC' in d2[k]: - d[k] = 4 + d[k] = 8 if k in d1: # BBC could contain easy words that are not in CET4 or CET6. So 4 is not reasonable. Recompute difficulty level. d[k] = min(difficulty_level_from_frequency(k, d1), d[k]) elif k in d1: |