diff options
| -rw-r--r-- | app/difficulty.py | 15 | 
1 files changed, 9 insertions, 6 deletions
| diff --git a/app/difficulty.py b/app/difficulty.py index 58740f6..1c8a08f 100644 --- a/app/difficulty.py +++ b/app/difficulty.py @@ -20,29 +20,32 @@ def load_record(pickle_fname):  def difficulty_level_from_frequency(word, d):      level = 0 +    if not word in d: +        return level +          if 'what' in d: -        ratio = (d['what']+1)/(d[word]+1) +        ratio = (d['what']+1)/(d[word]+1) # what is a frequent word          level = math.log( max(ratio, 1), 10) -    level = min(level+1, 4)  +    level = min(level, 4)       return level  def get_difficulty_level(d1, d2):      d = {} -    L = list(d1.keys()) # in d1, we have freuqence for each word +    L = list(d1.keys())  # in d1, we have freuqence for each word      L2 = list(d2.keys()) # in d2, we have test types (e.g., CET4,CET6,BBC) for each word      L.extend(L2) -    L3 = list(set(L)) +    L3 = list(set(L)) # L3 contains all words      for k in L3:          if k in d2:              if 'CET4' in d2[k]: -                d[k] = 1 +                d[k] = 1 # CET4 word has level 1              elif 'CET6' in d2[k]:                  d[k] = 2              elif 'BBC' in d2[k]:                  d[k] = 4 -                if k in d1: # BBC could contain easy words not in CET4 or CET6.  So 4 is not reasonable.  Recompute difficulty level. +                if k in d1: # BBC could contain easy words that are not in CET4 or CET6.  So 4 is not reasonable.  Recompute difficulty level.                      d[k] = min(difficulty_level_from_frequency(k, d1), d[k])          elif k in d1:              d[k] = difficulty_level_from_frequency(k, d1) | 
