Publish EnglishPal source code

2021-04-06 16:22:03 +08:00 · 2021-04-06 16:22:03 +08:00 · 843ed03d4f
commit 843ed03d4f
20 changed files with 1608 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,12 @@
+venv/
+app/__init__.py
+app/__pycache__/
+app/sqlite_commands.py
+app/static/usr/*.jpg
+app/static/img/
+app/static/frequency/frequency_*.pickle
+app/static/frequency/frequency.p
+app/static/wordfreqapp.db
+app/static/wordfreqapp.sql
+app/static/donate-the-author.jpg
+app/static/donate-the-author-hidden.jpg
--- a/app/README.md
+++ b/app/README.md
@ -0,0 +1,90 @@
+How to run EnglishPal
+===========================
+
+
+
+Hui Lan <hui.lan@cantab.net>
+1 November 2019
+
+
+
+
+
+Run it on a local machine
+-------------------------
+
+python3 main.py
+
+
+
+
+
+
+Run it within Docker
+--------------------
+
+Assuming that docker has been installed...
+
+
+ssh to ubuntu@118.25.96.118
+cd to /home/lanhui/englishpal
+
+# Stop service
+sudo service docker restart
+
+# Rebuild container. Run this after modifying the source code.
+sudo docker build -t englishpal .
+
+# Run the application
+sudo docker run -d -p 90:80 -v /home/lanhui/englishpal/app/static/frequency:/app/static/frequency -t englishpal  # for permanently saving data
+sudo docker run -d -p 90:80 -t englishpal # data will be lost after existing
+
+# Save space.  Run it after sudo docker run
+sudo docker system prune -a -f
+
+
+# Other commands
+sudo docker ps -a
+
+sudo docker logs image_name, where image name could be obtained from sudo docker ps.
+
+build.sh contains all the above commands.  Run "sudo ./build.sh" to rebuild the web application.
+
+
+
+Update articles
+---------------
+
+pscp wordfreqapp.db lanhui@118.25.96.118:/home/lanhui/englishpal/app/static
+
+
+
+Feedback
+---------
+
+Tianhua people
+~~~~~~~~~~~~~~~~
+
+Need a smart phone app.  I use phone a lot.
+
+Can take a picture for text.  Automatic translation.
+
+You cannot ask students to use computers.
+
+
+Usability testing
+~~~~~~~~~~~~~~~~~~~~~~
+
+Respondent 1 --- Paid 10 yuan
+
+“成为会员”改成“注册”
+
+“登出”改成“退出”
+
+“收集生词吧”改成“生词收集栏”
+
+***不要自动显示下一篇
+
+需要有“上一篇”、“下一篇”
+
+Internal server error when register using an email address.
--- a/app/UseSqlite.py
+++ b/app/UseSqlite.py
@ -0,0 +1,73 @@
+###########################################################################
+# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
+# Written permission must be obtained from the author for commercial uses.
+###########################################################################
+
+
+# Reference: Dusty Phillips.  Python 3 Objected-oriented Programming Second Edition. Pages 326-328.
+# Copyright (C) 2019 Hui Lan
+
+import sqlite3
+
+class Sqlite3Template:
+    def __init__(self, db_fname):
+        self.db_fname = db_fname
+        
+    def connect(self, db_fname):
+        self.conn = sqlite3.connect(self.db_fname)
+    
+    def instructions(self, query_statement):
+        raise NotImplementedError()
+    
+    def operate(self):
+        self.conn.row_factory = sqlite3.Row
+        self.results = self.conn.execute(self.query) # self.query is to be given in the child classes
+        self.conn.commit()
+        
+    def format_results(self):
+        raise NotImplementedError()  
+    
+    def do(self):
+        self.connect(self.db_fname)
+        self.instructions(self.query)
+        self.operate()
+
+        
+class InsertQuery(Sqlite3Template):
+    def instructions(self, query):
+        self.query = query
+        
+
+class RecordQuery(Sqlite3Template):
+    def instructions(self, query):
+        self.query = query
+
+    def format_results(self):
+        output = []
+        for row_dict in self.results.fetchall():
+            lst = []
+            for k in dict(row_dict):
+                lst.append( row_dict[k] )
+            output.append(', '.join(lst))
+        return '\n\n'.join(output)
+    
+    def get_results(self):
+        result = []
+        for row_dict in self.results.fetchall():
+            result.append( dict(row_dict) )
+        return result
+    
+
+
+if __name__ == '__main__':
+    
+    #iq = InsertQuery('RiskDB.db')
+    #iq.instructions("INSERT INTO inspection Values ('FoodSupplies', 'RI2019051301', '2019-05-13', '{}')")
+    #iq.do()
+    #iq.instructions("INSERT INTO inspection Values ('CarSupplies', 'RI2019051302', '2019-05-13', '{[{\"risk_name\":\"elevator\"}]}')")
+    #iq.do()
+    
+    rq = RecordQuery('wordfreqapp.db')
+    rq.instructions("SELECT * FROM article WHERE level=3")
+    rq.do()
+    #print(rq.format_results())
--- a/app/WordFreq.py
+++ b/app/WordFreq.py
@ -0,0 +1,25 @@
+###########################################################################
+# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
+# Written permission must be obtained from the author for commercial uses.
+###########################################################################
+
+from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order
+import string
+
+class WordFreq:
+    def __init__(self, s):
+        self.s = remove_punctuation(s)
+
+    def get_freq(self):
+        lst = []
+        for t in freq(self.s):
+            word = t[0]
+            if len(word) > 0 and word[0] in string.ascii_letters:
+                lst.append(t)
+        return sort_in_descending_order(lst)
+    
+
+if __name__ == '__main__':
+    f = WordFreq('BANANA; Banana, apple ORANGE Banana banana.')
+    print(f.get_freq())
+
--- a/app/build.sh
+++ b/app/build.sh
@ -0,0 +1,17 @@
+#!/bin/sh
+
+cd /home/lanhui/englishpal
+
+# Stop service
+sudo service docker restart
+
+# Rebuild container. Run this after modifying the source code.
+sudo docker build -t englishpal .
+
+# Run the application
+sudo docker run -d -p 90:80 -v /home/lanhui/englishpal/app/static/frequency:/app/static/frequency -t englishpal  # for permanently saving data
+
+# Save space.  Run it after sudo docker run
+sudo docker system prune -a -f
+
+
--- a/app/difficulty.py
+++ b/app/difficulty.py
@ -0,0 +1,243 @@
+###########################################################################
+# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
+# Written permission must be obtained from the author for commercial uses.
+###########################################################################
+
+# Purpose: compute difficulty level of a English text
+
+import pickle
+import math
+from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
+
+
+def load_record(pickle_fname):
+    f = open(pickle_fname, 'rb')
+    d = pickle.load(f)
+    f.close()
+    return d
+
+
+def difficulty_level_from_frequency(word, d):
+    level = 1
+    if not word in d:
+        return level
+    
+    if 'what' in d:
+        ratio = (d['what']+1)/(d[word]+1) # what is a frequent word
+        level = math.log( max(ratio, 1), 2)
+
+    level = min(level, 8) 
+    return level
+
+
+def get_difficulty_level(d1, d2):
+    d = {}
+    L = list(d1.keys())  # in d1, we have freuqence for each word
+    L2 = list(d2.keys()) # in d2, we have test types (e.g., CET4,CET6,BBC) for each word
+    L.extend(L2)
+    L3 = list(set(L)) # L3 contains all words
+    for k in L3:
+        if k in d2:
+            if 'CET4' in d2[k]:
+                d[k] = 4 # CET4 word has level 4
+            elif 'CET6' in d2[k]:
+                d[k] = 6
+            elif 'BBC' in d2[k]:
+                d[k] = 8
+                if k in d1: # BBC could contain easy words that are not in CET4 or CET6.  So 4 is not reasonable.  Recompute difficulty level.
+                    d[k] = min(difficulty_level_from_frequency(k, d1), d[k])
+        elif k in d1:
+            d[k] = difficulty_level_from_frequency(k, d1)
+
+    return d
+
+        
+
+def revert_dict(d):
+    '''
+    In d2, time is the key, and the value is a list of words picked at that time.
+    '''
+    d2 = {}
+    for k in d:
+        lst = d[k]
+        for time_info in lst:
+            date = time_info[:10] # until hour
+            if not date in d2:
+                d2[date] = [k]
+            else:
+                d2[date].append(k)
+    return d2
+
+
+def user_difficulty_level(d_user, d):
+    d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date
+    count = 0
+    geometric = 1
+    for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level
+        lst = d_user2[date] # a list of words
+        lst2 = [] # a list of tuples, (word, difficulty level)
+        for  word in lst:
+            if word in d:
+                lst2.append((word, d[word]))
+
+        lst3 = sort_in_ascending_order(lst2) # easiest tuple first
+        #print(lst3)
+        for t in lst3:
+            word = t[0]
+            hard = t[1]
+            #print('WORD %s HARD %4.2f' % (word, hard))
+            geometric = geometric * (hard)
+            count += 1
+            if count >= 10:
+                return geometric**(1/count)
+
+    return geometric**(1/max(count,1))
+
+
+def text_difficulty_level(s, d):
+    s = remove_punctuation(s)
+    L = freq(s)
+
+    lst = [] # a list of tuples, each tuple being (word, difficulty level)
+    for x in L:
+        word = x[0]
+        if word in d:
+            lst.append((word, d[word]))
+
+    lst2 = sort_in_descending_order(lst) # most difficult words on top
+    #print(lst2)
+    count = 0
+    geometric = 1
+    for t in lst2:
+        word = t[0]
+        hard = t[1]
+        geometric = geometric * (hard)
+        count += 1
+        if count >= 20: # we look for n most difficult words
+            return geometric**(1/count)
+        
+    return geometric**(1/max(count,1))
+
+
+
+if __name__ == '__main__':
+
+
+    d1 = load_record('frequency.p')
+    #print(d1)
+
+    d2 = load_record('words_and_tests.p')
+    #print(d2)
+
+
+    d3 = get_difficulty_level(d1, d2)
+
+    s = '''
+South Lawn
+11:53 A.M. EDT
+THE PRESIDENT:  Hi, everybody.  Hi.  How are you?  So, the stock market is doing very well.
+The economy is booming.  We have a new record in sight.  It could happen even today.
+But we have a new stock market record.  I think it’ll be about 118 times that we’ve broken the record.
+Jobs look phenomenal.
+    '''
+    s = '''
+By the authority vested in me as President by the Constitution and the laws of the United States, after carefully considering the reports submitted to the Congress by the Energy Information Administration, including the report submitted in October 2019, and other relevant factors, including global economic conditions, increased oil production by certain countries, the global level of spare petroleum production capacity, and the availability of strategic reserves, I determine, pursuant to section 1245(d)(4)(B) and (C) of the National Defense Authorization Act for Fiscal Year 2012, Public Law 112-81, and consistent with prior determinations, that there is a sufficient supply of petroleum and petroleum products from countries other than Iran to permit a significant reduction in the volume of petroleum and petroleum products purchased from Iran by or through foreign financial institutions.
+
+'''
+
+    s = '''
+Democrats keep their witnesses locked behind secure doors, then flood the press with carefully sculpted leaks and accusations, driving the Trump-corruption narrative. And so the party goes, galloping toward an impeachment vote that would overturn the will of the American voters—on a case built in secret.
+
+Conservative commentators keep noting that Mrs. Pelosi’s refusal to hold a vote on the House floor to authorize an official impeachment inquiry helps her caucus’s vulnerable members evade accountability. But there’s a more practical and uglier reason for Democrats to skip the formalities. Normally an authorization vote would be followed by official rules on how the inquiry would proceed. Under today’s process, Mr. Schiff gets to make up the rules as he goes along. Behold the Lord High Impeacher.
+
+Democrats view control over the narrative as essential, having learned from their Russia-collusion escapade the perils of transparency. They banked on special counsel Robert Mueller’s investigation proving impeachment fodder, but got truth-bombed. Their subsequent open hearings on the subject—featuring Michael Cohen, Mr. Mueller and Corey Lewandowski —were, for the Democrats, embarrassing spectacles, at which Republicans punched gaping holes in their story line.
+
+Mr. Schiff is making sure that doesn’t happen again; he’ll present the story, on his terms. His rules mean he can issue that controlling decree about “only one” transcript and Democratic staff supervision of Republican members. It means he can bar the public, the press and even fellow representatives from hearings, even though they’re unclassified.
+'''
+
+    s = '''
+Unemployment today is at a 50-year low.  There are more Americans working today than ever before.  Median household income in the last two and half years has risen by more than $5,000.  And that doesn’t even account for the savings from the President’s tax cuts or energy reforms for working families.
+
+Because of the President’s policies, America has added trillions of dollars of wealth to our economy while China’s economy continues to fall behind.
+
+To level the playing field for the American worker against unethical trade practices, President Trump levied tariffs on $250 billion in Chinese goods in 2018.  And earlier this year, the President announced we would place tariffs on another $300 billion of Chinese goods if significant issues in our trading relationship were not resolved by December of this year.
+'''
+    s = '''
+Needless to say, we see it very differently.  Despite the great power competition that is underway, and America’s growing strength, we want better for China.  That’s why, for the first time in decades, under President Donald Trump’s leadership, the United States is treating China’s leaders exactly how the leaders of any great world power should be treated — with respect, yes, but also with consistency and candor.
+'''
+    s = '''
+Brexit is the scheduled withdrawal of the United Kingdom from the European Union. Following a June 2016 referendum, in which 51.9% voted to leave, the UK government formally announced the country's withdrawal in March 2017, starting a two-year process that was due to conclude with the UK withdrawing on 29 March 2019. As the UK parliament thrice voted against the negotiated withdrawal agreement, that deadline has been extended twice, and is currently 31 October 2019. The Benn Act, passed in September 2019, requires the government to seek a third extension.
+'''
+
+    s = '''
+The argument for Brexit
+According to the BBC, the push to leave the EU was advocated mostly by the UK Independence Party and was not supported by the Prime Minister, David Cameron. Members of the UK Independence Party argued that Britain’s participation in the EU was a restrictive element for the country.
+
+As one of the EU’s primary initiatives is free movement within the region the party’s main arguments centered around regaining border control and reclaiming business rights. In addition, supporters of Brexit cited the high EU membership fees as a negative aspect of participation in the EU. It was argued that if the UK separates itself from the EU, these fees can be used to benefit the UK.
+
+The argument against Brexit
+The Conservative Party and the Prime Minister were strongly in favor of remaining with the EU. As a result of the decision to discontinue its participation in the EU, the Prime Minister has made a public statement that he will be relinquishing his position. He believes that the country needs a leader with the same goals as the majority of the country. He has promised a new PM will be in place by early September.
+
+The argument against Brexit pertains mostly to the business benefits. The argument is that the UK receives business benefits by being able to participate in the single market system established by the EU. In response to the criticism against the open borders, proponents believe that the influx of immigrants helps develop an eager workforce and fuels public service projects.
+
+Leaders in favor of staying also worry about the political backlash that could possibly result from other countries who favored staying with the EU. In addition, proponents of remaining with the EU believe that being part of a wider community of nations provides economic and cultural strength, as well as an additional element of security.
+
+What does Brexit mean for the future?
+While the decision marked a huge statement for the UK, the referendum vote is not legally binding. There are still many hurdles that must be dealt with before Brexit can become a reality.
+
+The UK is still subject to the laws of the EU until Britain’s exit becomes legal. In order for the UK to make its break official, the country needs to invoke Article 50. It is unclear exactly what this process will entail or how long it will take as Britain is the first country to take its leave of the EU. Once Article 50 has been formally invoked, the UK has two years to negotiate its departure with the other member states. But according to the BBC, “Extricating the UK from the EU will be extremely complex, and the process could drag on longer than that.”
+
+Amidst the aftermath of this shocking referendum vote, there is great uncertainty as political leaders decide what this means for the UK.
+
+'''
+
+
+    s = '''
+British Prime Minister Boris Johnson walks towards a voting station during the Brexit referendum in Britain, June 23, 2016. (Photo: EPA-EFE)
+
+LONDON – British Prime Minister Boris Johnson said Thursday he will likely ask Parliament to approve an election as part of an effort to break a Brexit deadlock.
+
+It is not clear if the vote, which Johnson wants to hold on Dec. 12, will take place as opposition lawmakers must also back the move.
+
+They are expected to vote on the measure on Monday. 
+
+Johnson's announcement comes ahead of an expected decision Friday from the European Union over whether to delay Britain's exit from the bloc for three months. 
+
+Britain's leader has been steadfastly opposed to any extension to the nation's scheduled Oct. 31 departure date from the EU, although in a letter to the leader of the opposition Labour Party this week he said he would accept a short technical postponement, "say to 15 or 30 November," to allow lawmakers to implement an EU withdrawal bill. 
+
+Johnson's decision to offer to call an election follows lawmakers' rejection of his plan to rush through an EU exit bill that runs to hundreds of pages in just three days. They want more time to scrutinize the legislation and to make sure it does not leave the door open to a possible "no-deal" Brexit during future exit negotiations with the EU that will run through next year. A "no-deal" Brexit could dramatically harm Britain's economy. 
+
+The prime minister was forced to ask for an extension to Britain's EU departure date after Britain's Parliament passed a law to ward off the threat of a "no-deal" Brexit.
+
+Johnson has repeatedly pledged to finalize the first stage, a transition deal, of Britain's EU divorce battle by Oct. 31. A second stage will involve negotiating its future relationship with the EU on trade, security and other salient issues.
+'''
+
+
+    s = '''
+Thank you very much. We have a Cabinet meeting. We’ll have a few questions after grace. And, if you would, Ben, please do the honors.
+
+THE PRESIDENT: All right, thank you, Ben. That was a great job. Appreciate it.
+
+The economy is doing fantastically well. It’s getting very close to another record. We’ve had many records since we won office. We’re getting very close to another record. I don’t know if anybody saw it: The household median income for eight years of President Bush, it rose $400. For eight years of President Obama, it rose $975. And for two and half years of President Trump — they have it down as two and a half years — it rose $5,000, not including $2,000 for taxes. So it rose, let’s say, $7,000. So in two and a half years, we’re up $7,000, compared to $1,000, compared to $400. And that’s for eight years and eight years.
+
+That’s a number that just came out, but that’s a number that I don’t know how there could be any dispute or any — I’ve never heard a number like that, meaning the economy is doing fantastically well.
+
+We need — for our farmers, our manufacturers, for, frankly, unions and non-unions, we need USMCA to be voted on. If it’s voted on, it’ll pass. It’s up to Nancy Pelosi to put it up. If she puts it up, it’s going to pass. It’s going to be very bipartisan. It’s something that’s very much needed. It’ll be hundreds of thousands of jobs.
+
+
+'''
+
+
+
+
+    #f = open('bbc-fulltext/bbc/entertainment/001.txt')
+    f = open('wordlist.txt')
+    s = f.read()
+    f.close()
+
+
+
+    
+    print(text_difficulty_level(s, d3))
+
+            
--- a/app/main.py
+++ b/app/main.py
@ -0,0 +1,421 @@
+#! /usr/bin/python3
+# -*- coding: utf-8 -*-
+
+###########################################################################
+# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
+# Written permission must be obtained from the author for commercial uses.
+###########################################################################
+
+from WordFreq import WordFreq
+from wordfreqCMD import youdao_link, sort_in_descending_order
+from UseSqlite import InsertQuery, RecordQuery
+import pickle_idea, pickle_idea2
+import os
+import random, glob
+from datetime import datetime
+from flask import Flask, request, redirect, render_template, url_for, session, abort, flash
+from difficulty import get_difficulty_level, text_difficulty_level, user_difficulty_level
+
+app = Flask(__name__)
+app.secret_key = 'lunch.time!'
+
+path_prefix = '/var/www/wordfreq/wordfreq/'
+path_prefix = './' # comment this line in deployment
+
+def get_random_image(path):
+    img_path = random.choice(glob.glob(os.path.join(path, '*.jpg')))
+    return img_path[img_path.rfind('/static'):]
+
+def get_random_ads():
+    ads = random.choice(['个性化分析精准提升', '你的专有单词本', '智能捕捉阅读弱点，针对性提高你的阅读水平'])
+    return ads + '。 <a href="/signup">试试</a>吧！'
+
+def load_freq_history(path):
+    d = {}
+    if os.path.exists(path):
+        d = pickle_idea.load_record(path)
+    return d
+
+def verify_user(username, password):
+    rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
+    rq.instructions("SELECT * FROM user WHERE name='%s' AND password='%s'" % (username, password))
+    rq.do()
+    result = rq.get_results()
+    return result != []
+
+def add_user(username, password):
+    start_date = datetime.now().strftime('%Y%m%d')
+    expiry_date = '20211230'
+    rq = InsertQuery(path_prefix + 'static/wordfreqapp.db')
+    rq.instructions("INSERT INTO user Values ('%s', '%s', '%s', '%s')" % (username, password, start_date, expiry_date))
+    rq.do()
+
+    
+def check_username_availability(username):
+    rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
+    rq.instructions("SELECT * FROM user WHERE name='%s'" % (username))
+    rq.do()
+    result = rq.get_results()
+    return  result == []
+
+def get_expiry_date(username):
+    rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
+    rq.instructions("SELECT expiry_date FROM user WHERE name='%s'" % (username))
+    rq.do()
+    result = rq.get_results()
+    if len(result) > 0:
+        return  result[0]['expiry_date']
+    else:
+        return '20191024'
+    
+
+
+def within_range(x, y, r):
+    return x > y and abs(x - y) <= r 
+
+
+def get_today_article(user_word_list, articleID):
+
+    rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
+    if articleID == None:    
+        rq.instructions("SELECT * FROM article")
+    else:
+        rq.instructions('SELECT * FROM article WHERE article_id=%d' % (articleID))
+    rq.do()
+    result = rq.get_results()
+    
+    # Choose article according to reader's level
+    d1 = load_freq_history(path_prefix + 'static/frequency/frequency.p')
+    d2 = load_freq_history(path_prefix + 'static/words_and_tests.p')
+    d3 = get_difficulty_level(d1, d2)
+
+    d = {}
+    d_user = load_freq_history(user_word_list)
+    user_level = user_difficulty_level(d_user, d3) # more consideration as user's behaviour is dynamic. Time factor should be considered.
+    random.shuffle(result) # shuffle list
+    d = random.choice(result)
+    text_level = text_difficulty_level(d['text'], d3)
+    if articleID == None:
+        for reading in result:
+            text_level = text_difficulty_level(reading['text'], d3)
+            #print('TEXT_LEVEL %4.2f' % (text_level))
+            if within_range(text_level, user_level, 0.5):
+                d = reading
+                break
+            
+    s = '<p><i>According to your word list, your level is <b>%4.2f</b> and we have chosen an article with a difficulty level of <b>%4.2f</b> for you.</i></p>' % (user_level, text_level)
+    s += '<p><b>%s</b></p>' % (d['date'])
+    s += '<p><font size=+2>%s</font></p>' % (d['text'])
+    s += '<p><i>%s</i></p>' % (d['source'])
+    s += '<p><b>%s</b></p>' % (get_question_part(d['question']))
+    s = s.replace('\n', '<br/>')    
+    s += '%s' % (get_answer_part(d['question']))
+    session['articleID'] = d['article_id']
+    return s
+
+
+def appears_in_test(word, d):
+    if not word in d:
+        return ''
+    else:
+        return ','.join(d[word])
+
+
+def get_time():
+    return datetime.now().strftime('%Y%m%d%H%M') # upper to minutes
+
+
+def get_question_part(s):
+    s = s.strip()
+    result = []
+    flag = 0
+    for line in s.split('\n'):
+        line = line.strip()
+        if line == 'QUESTION':
+            result.append(line)
+            flag = 1
+        elif line == 'ANSWER':
+            flag = 0
+        elif flag == 1:
+            result.append(line)
+    return '\n'.join(result)
+
+
+def get_answer_part(s):
+    s = s.strip()
+    result = []
+    flag = 0
+    for line in s.split('\n'):
+        line = line.strip()
+        if line == 'ANSWER':
+            flag = 1
+        elif flag == 1:
+            result.append(line)
+    # https://css-tricks.com/snippets/javascript/showhide-element/
+    js = '''
+<script type="text/javascript">
+
+    function toggle_visibility(id) {
+       var e = document.getElementById(id);
+       if(e.style.display == 'block')
+          e.style.display = 'none';
+       else
+          e.style.display = 'block';
+    }
+</script>   
+    '''
+    html_code = js
+    html_code += '\n'
+    html_code += '<button onclick="toggle_visibility(\'answer\');">ANSWER</button>\n'
+    html_code += '<div id="answer" style="display:none;">%s</div>\n' % ('\n'.join(result))
+    return html_code
+
+
+
+@app.route("/<username>/reset", methods=['GET', 'POST'])
+def user_reset(username):
+    if request.method == 'GET':
+        session['articleID'] = None
+        return redirect(url_for('userpage', username=username))
+    else:
+        return 'Under construction'
+
+
+@app.route("/mark", methods=['GET', 'POST'])
+def mark_word():
+    if request.method == 'POST':
+        d = load_freq_history(path_prefix + 'static/frequency/frequency.p')
+        lst_history = pickle_idea.dict2lst(d)
+        lst = []
+        for word in request.form.getlist('marked'):
+            lst.append((word, 1))
+        d = pickle_idea.merge_frequency(lst, lst_history)
+        pickle_idea.save_frequency_to_pickle(d, path_prefix + 'static/frequency/frequency.p')
+        return redirect(url_for('mainpage'))
+    else:
+        return 'Under construction'
+
+
+
+@app.route("/", methods=['GET', 'POST'])
+def mainpage():
+    if request.method == 'POST':  # when we submit a form
+        content = request.form['content']
+        f = WordFreq(content)
+        lst = f.get_freq()
+        page = '<form method="post" action="/mark">\n'
+        count = 1
+        for x in lst:
+            page += '<p><font color="grey">%d</font>: <a href="%s">%s</a> (%d)  <input type="checkbox" name="marked" value="%s"></p>\n' % (count, youdao_link(x[0]), x[0], x[1], x[0])
+            count += 1
+        page += ' <input type="submit" value="确定并返回"/>\n'
+        page += '</form>\n'
+        # save history 
+        d = load_freq_history(path_prefix + 'static/frequency/frequency.p')
+        lst_history = pickle_idea.dict2lst(d)
+        d = pickle_idea.merge_frequency(lst, lst_history)
+        pickle_idea.save_frequency_to_pickle(d, path_prefix + 'static/frequency/frequency.p')
+        
+        return page
+    elif request.method == 'GET': # when we load a html page
+        page = '''
+             <html lang="zh">
+               <head>
+               <meta charset="utf-8">
+               <meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=0.5, maximum-scale=3.0, user-scalable=yes" />
+                 <title>EnglishPal 英文单词高效记</title>
+
+               </head>
+               <body>
+        '''
+        page += '<p><b><font size="+3" color="red">English Pal - Learn English in a smart way!</font></b></p>'
+        if session.get('logged_in'):
+            page += ' <a href="%s">%s</a></p>\n' % (session['username'], session['username'])
+        else:
+            page += '<p><a href="/login">登录</a>  <a href="/signup">成为会员</a> <a href="/static/usr/instructions.html">使用说明</a></p>\n'
+        #page += '<p><img src="%s" width="400px" alt="advertisement"/></p>' % (get_random_image(path_prefix + 'static/img/'))
+        page += '<p><b>%s</b></p>' % (get_random_ads())
+        page += '<p>粘帖1篇文章 (English only)</p>'
+        page += '<form method="post" action="/">'
+        page += ' <textarea name="content" rows="10" cols="120"></textarea><br/>'
+        page += ' <input type="submit" value="get文章中的词频"/>'
+        page += ' <input type="reset" value="清除"/>'
+        page += '</form>'
+        d = load_freq_history(path_prefix + 'static/frequency/frequency.p')
+        if len(d) > 0:
+            page += '<p><b>最常见的词</b></p>'
+            for x in sort_in_descending_order(pickle_idea.dict2lst(d)):
+                if x[1] <= 99:
+                    break
+                page += '<a href="%s">%s</a> %d\n' % (youdao_link(x[0]), x[0], x[1])
+
+        page += '</body></html>'
+        return page
+
+
+@app.route("/<username>/mark", methods=['GET', 'POST'])
+def user_mark_word(username):
+    username = session[username]
+    user_freq_record = path_prefix + 'static/frequency/' +  'frequency_%s.pickle' % (username)
+    if request.method == 'POST':
+        d = load_freq_history(user_freq_record)
+        lst_history = pickle_idea2.dict2lst(d)
+        lst = []
+        for word in request.form.getlist('marked'):
+            lst.append((word, [get_time()]))
+        d = pickle_idea2.merge_frequency(lst, lst_history)
+        pickle_idea2.save_frequency_to_pickle(d, user_freq_record)
+        return redirect(url_for('userpage', username=username))
+    else:
+        return 'Under construction'
+
+
+
+@app.route("/<username>", methods=['GET', 'POST'])
+def userpage(username):
+    
+    if not session.get('logged_in'):
+        return '<p>请先<a href="/login">登录</a>。</p>'
+
+    user_expiry_date = session.get('expiry_date')
+    if datetime.now().strftime('%Y%m%d') > user_expiry_date:
+        return '<p>账号 %s 过期。</p><p>为了提高服务质量，English Pal 收取会员费用， 每天0元。</p> <p>请决定你要试用的时间长度，扫描下面支付宝二维码支付。 支付时请注明<i>English Pal Membership Fee</i>。 我们会于12小时内激活账号。</p><p><img src="static/donate-the-author-hidden.jpg" width="120px" alt="支付宝二维码" /></p><p>如果有问题，请加开发者微信 torontohui。</p> <p><a href="/logout">登出</a></p>' % (username)
+
+    
+    username = session.get('username')
+
+    user_freq_record = path_prefix + 'static/frequency/' +  'frequency_%s.pickle' % (username)
+    
+    if request.method == 'POST':  # when we submit a form
+        content = request.form['content']
+        f = WordFreq(content)
+        lst = f.get_freq()
+        page = '<meta charset="UTF8">'        
+        page += '<meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=0.5, maximum-scale=3.0, user-scalable=yes" />'        
+        page += '<p>勾选不认识的单词</p>'
+        page += '<form method="post" action="/%s/mark">\n' % (username)
+        page += ' <input type="submit" name="add-btn" value="加入我的生词簿"/>\n'        
+        count = 1
+        words_tests_dict = pickle_idea.load_record(path_prefix + 'static/words_and_tests.p')        
+        for x in lst:
+            page += '<p><font color="grey">%d</font>: <a href="%s" title="%s">%s</a> (%d)  <input type="checkbox" name="marked" value="%s"></p>\n' % (count, youdao_link(x[0]), appears_in_test(x[0], words_tests_dict), x[0], x[1], x[0])
+            count += 1
+        page += '</form>\n'
+        return page
+    
+    elif request.method == 'GET': # when we load a html page
+        page = '<meta charset="UTF8">\n'
+        page += '<meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=0.5, maximum-scale=3.0, user-scalable=yes" />\n'
+        page += '<meta name="format-detection" content="telephone=no" />\n' # forbid treating numbers as cell numbers in smart phones
+        page += '<title>EnglishPal Study Room for %s</title>' % (username)
+        page += '<p><b>English Pal for <font color="red">%s</font></b> <a href="/logout">登出</a></p>' % (username)
+        page += '<p><a href="/%s/reset">下一篇</a></p>' % (username)
+        page += '<p><b>阅读文章并回答问题</b></p>\n'
+        page += '<div id="text-content">%s</div>'  % (get_today_article(user_freq_record, session['articleID']))
+        page += '<p><b>收集生词吧</b> （可以在正文中划词，也可以复制黏贴）</p>'
+        page += '<form method="post" action="/%s">' % (username)
+        page += ' <textarea name="content" id="selected-words" rows="10" cols="120"></textarea><br/>'
+        page += ' <input type="submit" value="get 所有词的频率"/>'
+        page += ' <input type="reset" value="清除"/>'
+        page += '</form>\n'
+        page += ''' 
+                 <script>
+                   function getWord(){ 
+                       var word = window.getSelection?window.getSelection():document.selection.createRange().text;
+                       return word;
+                   }
+                   function fillinWord(){
+                       var element = document.getElementById("selected-words");
+                       element.value = element.value + " " + getWord();
+                   }
+                   document.getElementById("text-content").addEventListener("click", fillinWord, false);
+                   document.getElementById("text-content").addEventListener("touchstart", fillinWord, false);
+                 </script>
+                 '''
+        
+        d = load_freq_history(user_freq_record)
+        if len(d) > 0:
+            page += '<p><b>我的生词簿</b></p>'
+            lst = pickle_idea2.dict2lst(d)
+            lst2 = []
+            for t in lst:
+                lst2.append((t[0], len(t[1])))
+            for x in sort_in_descending_order(lst2):
+                word = x[0]
+                freq = x[1]
+                if isinstance(d[word], list): # d[word] is a list of dates
+                    if freq > 1:
+                        page += '<p class="new-word"> <a href="%s">%s</a>                     (<a title="%s">%d</a>) </p>\n' % (youdao_link(word), word, '; '.join(d[word]), freq)
+                    else:
+                        page += '<p class="new-word"> <a href="%s">%s</a> <font color="white">(<a title="%s">%d</a>)</font> </p>\n' % (youdao_link(word), word, '; '.join(d[word]), freq)
+                elif isinstance(d[word], int): # d[word] is a frequency. to migrate from old format.
+                    page += '<a href="%s">%s</a>%d\n' % (youdao_link(word), word, freq)                    
+                    
+                
+        return page
+
+### Sign-up, login, logout ###
+@app.route("/signup", methods=['GET', 'POST'])
+def signup():
+    if request.method == 'GET':
+        return render_template('signup.html')
+    elif request.method == 'POST':
+        username = request.form['username']
+        password = request.form['password']
+
+        available = check_username_availability(username)
+        if not available:
+            flash('用户名 %s 已经被注册。' % (username))
+            return render_template('signup.html')
+        elif len(password.strip()) < 4:
+            return '密码过于简单。'
+        else:
+            add_user(username, password)
+            verified = verify_user(username, password)
+            if verified:
+                session['logged_in'] = True
+                session[username] = username
+                session['username'] = username
+                return '<p>恭喜，你已成功注册， 你的用户名是 <a href="%s">%s</a>。</p>\
+                <p><a href="/%s">开始使用</a> <a href="/">返回首页</a><p/>' % (username, username, username)
+            else:
+                return '用户名密码验证失败。'
+
+
+@app.route("/login", methods=['GET', 'POST'])
+def login():
+    if request.method == 'GET':
+        if not session.get('logged_in'):
+            return render_template('login.html')
+        else:
+            return '你已登录 <a href="/%s">%s</a>。 登出点击<a href="/logout">这里</a>。' % (session['username'], session['username'])
+    elif request.method == 'POST':
+        # check database and verify user
+        username = request.form['username']
+        password = request.form['password']
+        verified = verify_user(username, password)
+        if verified:
+            session['logged_in'] = True
+            session[username] = username
+            session['username'] = username
+            user_expiry_date = get_expiry_date(username)
+            session['expiry_date'] = user_expiry_date
+            session['articleID'] = None
+            return redirect(url_for('userpage', username=username))
+        else:
+            return '无法通过验证。'
+
+
+@app.route("/logout", methods=['GET', 'POST'])
+def logout():
+    session['logged_in'] = False
+    return redirect(url_for('mainpage'))
+
+
+if __name__ == '__main__':
+    #app.secret_key = os.urandom(16)
+    #app.run(debug=False, port='6000')
+    app.run(debug=True)        
+    #app.run(debug=True, port='6000')
+    #app.run(host='0.0.0.0', debug=True, port='6000')
+
--- a/app/pickle_idea.py
+++ b/app/pickle_idea.py
@ -0,0 +1,70 @@
+###########################################################################
+# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
+# Written permission must be obtained from the author for commercial uses.
+###########################################################################
+
+# Purpose: dictionary & pickle as a simple means of database.
+# Task: incorporate the functions into wordfreqCMD.py such that it will also show cumulative frequency.
+
+import pickle
+
+
+def lst2dict(lst, d):
+    ''' 
+    Store the information in list lst to dictionary d. 
+    Note: nothing is returned.
+
+    '''
+    for x in lst:
+        word = x[0]
+        freq = x[1]
+        if not word in d:
+            d[word] = freq 
+        else:
+            d[word] += freq
+
+
+def dict2lst(d):
+    return list(d.items()) # a list of (key, value) pairs
+        
+
+def merge_frequency(lst1, lst2):
+    d = {}
+    lst2dict(lst1, d)
+    lst2dict(lst2, d)
+    return d
+
+
+def load_record(pickle_fname):
+    f = open(pickle_fname, 'rb')
+    d = pickle.load(f)
+    f.close()
+    return d
+
+
+def save_frequency_to_pickle(d, pickle_fname):
+    f = open(pickle_fname, 'wb')
+    #exclusion_lst = ['one', 'no', 'has', 'had', 'do', 'that', 'have', 'by', 'not', 'but', 'we', 'this', 'my', 'him', 'so', 'or', 'as', 'are', 'it', 'from', 'with', 'be', 'can', 'for', 'an', 'if', 'who', 'whom', 'whose', 'which', 'the', 'to', 'a', 'of', 'and', 'you', 'i', 'he', 'she', 'they', 'me', 'was', 'were', 'is', 'in', 'at', 'on', 'their', 'his', 'her', 's', 'said', 'all', 'did', 'been', 'w']
+    exclusion_lst = []
+    d2 = {}
+    for k in d:
+        if not k in exclusion_lst and not k.isnumeric() and len(k) > 1:
+            d2[k] = d[k]
+    pickle.dump(d2, f)
+    f.close()
+
+
+
+if __name__ == '__main__':
+
+    lst1 = [('apple',2),  ('banana',1)]
+    d = {}
+    lst2dict(lst1, d) # d will change
+    save_frequency_to_pickle(d, 'frequency.p') # frequency.p is our database
+
+
+    lst2 = [('banana',2), ('orange', 4)]
+    d = load_record('frequency.p')
+    lst1 = dict2lst(d)
+    d = merge_frequency(lst2, lst1)
+    print(d)
--- a/app/pickle_idea2.py
+++ b/app/pickle_idea2.py
@ -0,0 +1,80 @@
+###########################################################################
+# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
+# Written permission must be obtained from the author for commercial uses.
+###########################################################################
+
+
+# Purpose: dictionary & pickle as a simple means of database.
+# Task: incorporate the functions into wordfreqCMD.py such that it will also show cumulative frequency.
+# Note: unlike pick_idea.py, now the second item is not frequency, but a list of dates.
+
+import pickle
+from datetime import datetime
+
+def lst2dict(lst, d):
+    ''' 
+    Store the information in list lst to dictionary d. 
+    Note: nothing is returned.
+
+    '''
+    for x in lst:
+        word = x[0]
+        dates = x[1]
+        if not word in d:
+            d[word] = dates
+        else:
+            d[word] += dates
+
+
+def dict2lst(d):
+    if len(d) > 0:
+        keys = list(d.keys())
+        if isinstance(d[keys[0]], int):
+            lst = []
+            for k in d:
+                lst.append((k, [datetime.now().strftime('%Y%m%d%H%M')]))
+            return lst
+        elif isinstance(d[keys[0]], list):
+            return list(d.items()) # a list of (key, value) pairs
+
+    return []
+
+def merge_frequency(lst1, lst2):
+    d = {}
+    lst2dict(lst1, d)
+    lst2dict(lst2, d)
+    return d
+
+
+def load_record(pickle_fname):
+    f = open(pickle_fname, 'rb')
+    d = pickle.load(f)
+    f.close()
+    return d
+
+
+def save_frequency_to_pickle(d, pickle_fname):
+    f = open(pickle_fname, 'wb')
+    exclusion_lst = ['one', 'no', 'has', 'had', 'do', 'that', 'have', 'by', 'not', 'but', 'we', 'this', 'my', 'him', 'so', 'or', 'as', 'are', 'it', 'from', 'with', 'be', 'can', 'for', 'an', 'if', 'who', 'whom', 'whose', 'which', 'the', 'to', 'a', 'of', 'and', 'you', 'i', 'he', 'she', 'they', 'me', 'was', 'were', 'is', 'in', 'at', 'on', 'their', 'his', 'her', 's', 'said', 'all', 'did', 'been', 'w']
+    d2 = {}
+    for k in d:
+        if not k in exclusion_lst and not k.isnumeric() and not len(k) < 2:
+            d2[k] = list(sorted(set(d[k])))
+    pickle.dump(d2, f)
+    f.close()
+
+
+
+if __name__ == '__main__':
+
+    lst1 = [('apple',['201910251437', '201910251438']),  ('banana',['201910251439'])]
+    d = {}
+    lst2dict(lst1, d) # d will change
+    save_frequency_to_pickle(d, 'frequency.p') # frequency.p is our database
+
+
+    lst2 = [('banana',['201910251439']), ('orange', ['201910251440', '201910251439'])]
+    d = load_record('frequency.p')
+    lst1 = dict2lst(d)
+    d = merge_frequency(lst2, lst1)
+    print(d)
--- a/app/static/usr/instructions.html
+++ b/app/static/usr/instructions.html
@ -0,0 +1,86 @@
+<!DOCTYPE html>
+<html lang="zh"><head>
+    <meta http-equiv="content-type" content="text/html; charset=UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=0.5, maximum-scale=3.0, user-scalable=yes" />    
+    <meta charset="utf-8"> 
+    <title>怎么用English Pal</title> 
+    <style>
+      li { margin: 3px 0; }
+      ul {list-style-type: none;}
+      body {font-size: 100%;}
+      * {font-family:SimSun !important;}
+
+      /* http://webtricksandtreats.com/table-style-css/ */
+      table.table-style-one {
+	  font-family: verdana,arial,sans-serif;
+	  font-size:14px;
+	  color:#333333;
+	  border-width: 1px;
+	  border-color: #3A3A3A;
+	  border-collapse: collapse;
+      }
+      table.table-style-one th {
+	  border-width: 1px;
+	  padding: 8px;
+	  border-style: solid;
+	  border-color: #3A3A3A;
+	  background-color: #F0F8FF;
+      }
+      table.table-style-one td {
+	  border-width: 1px;
+	  padding: 8px;
+	  border-style: solid;
+	  border-color: #3A3A3A;
+	  background-color: #ffffff;
+      }      
+    </style>
+  </head> 
+
+  <body>
+
+    <p>本软件的宗旨是：珍惜你的时间， 提高你的获取英文信息的速度与准确度。 不管你是英语爱好者，备考的学生，还是英语老师，都能从 English Pal 中发现用处。</p>
+
+    <p>如果你教英语，English Pal 可以帮你掌握题目词汇规律， 提高教学质量。</p>
+
+    <p>如果你学英语，English Pal 可以帮你迅速提高词汇， 轻松应对各种考试。 </p>
+
+    <p>1秒内闪电查词，告别字典。 私人定制的单词簿，永久相伴，记录奋斗岁月。 </p>    
+
+    <p>现在就<a href="/signup">试试看</a>吧。 English Pal 期待你的捷报， English Pal 期待你的<a href="./20161113-christ-piece-cambridge.jpg">远航</a>。</p>
+
+    <h2>使用方法</h2>
+    
+    <p>在EnglishPal主页点击<a href="http://118.25.96.118:90/signup">成为会员</a>链接。支付会员费后即可开始使用。</p>
+
+    <p>活跃会员还有机会获得英文阅读10分钟一对一指导。</p>
+    
+    <table class="table-style-one">
+      <thead>	  
+	<tr>
+	  <th>截图</th>
+	  <th>说明</th>
+	</tr>
+      </thead>
+      <tbody>
+	<tr>
+	  <td><a href="./EnglishPal-screenshot-01.jpg"><img src="./EnglishPal-screenshot-01.jpg" width="300px"/></a></td>
+	  <td>精选短文，让你窥见世界。 统计词频，让你掌握规律。</td>
+	</tr>
+
+	<tr>
+	  <td><a href="./EnglishPal-screenshot-02.jpg"><img src="./EnglishPal-screenshot-02.jpg" width="300px"/></a></td>
+	  <td>时间无价，个性化的生词簿，为你节省记背单词时间。</td>
+	</tr>
+
+	<tr>
+	  <td><a href="./EnglishPal-screenshot-03.jpg"><img src="./EnglishPal-screenshot-03.jpg" width="300px"/></a></td>
+	  <td>考试人人都怕，单词的考试分类，让你目的明确。</td>
+	</tr>
+	
+      </tbody>
+    </table>
+    
+
+  </body>
+
+</html>
--- a/app/static/wordfreqapp_schema.sql
+++ b/app/static/wordfreqapp_schema.sql
@ -0,0 +1,10 @@
+CREATE TABLE user(name TEXT PRIMARY KEY, password TEXT, start_date TEXT, expiry_date TEXT);
+CREATE TABLE sqlite_sequence(name,seq);
+CREATE TABLE IF NOT EXISTS "article" (
+	"article_id"	INTEGER PRIMARY KEY AUTOINCREMENT,
+	"text"	TEXT,
+	"source"	TEXT,
+	"date"	TEXT,
+	"level"	TEXT,
+	"question"	TEXT
+);
--- a/app/static/words_and_tests.p
+++ b/app/static/words_and_tests.p
--- a/app/templates/login.html
+++ b/app/templates/login.html
@ -0,0 +1,15 @@
+{% block body %}
+{% if session['logged_in'] %}
+
+You're logged in already!
+
+{% else %}
+<meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=0.5, maximum-scale=3.0, user-scalable=yes" />
+<form action="/login" method="POST">
+  <p><input type="username" name="username" placeholder="邮箱地址、电话号码"></p>
+  <p><input type="password" name="password" placeholder="密码"></p>
+  <p><input type="submit" value="登录"></p>
+</form>
+{% endif %}
+{% endblock %}
+
--- a/app/templates/signup.html
+++ b/app/templates/signup.html
@ -0,0 +1,19 @@
+{% block body %}
+{% if session['logged_in'] %}
+
+You're logged in already! <a href="/logout">Logout</a>.
+
+{% else %}
+<meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=0.5, maximum-scale=3.0, user-scalable=yes" />
+<p>{{ get_flashed_messages()[0] | safe }}</p>
+
+<p>Sign up here.</p>
+
+<form action="/signup" method="POST">
+  <p><input type="username" name="username" placeholder="邮箱地址、电话号码"></p>
+  <p><input type="password" name="password" placeholder="密码"></p>  
+  <p><input type="submit" value="注册"></p>
+</form>
+{% endif %}
+{% endblock %}
+
--- a/app/test/test_add_word.py
+++ b/app/test/test_add_word.py
@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+# Run the docker image using the following command:
+# docker run -d -p 4444:4444 selenium/standalone-chrome
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+
+import random, time
+import string
+
+driver = webdriver.Remote('http://localhost:4444/wd/hub', DesiredCapabilities.CHROME)
+driver.implicitly_wait(10)
+
+HOME_PAGE = 'http://121.4.94.30:91/'
+
+
+def has_punctuation(s):
+    return [c for c in s if c in string.punctuation] != []
+    
+def test_add_word():
+    try:
+        driver.get(HOME_PAGE)
+        assert 'English Pal -' in driver.page_source
+    
+        # login
+        elem = driver.find_element_by_link_text('登录')
+        elem.click()
+    
+        uname = 'lanhui'
+        password = 'l0ve1t'
+        elem = driver.find_element_by_name('username')
+        elem.send_keys(uname)
+    
+        elem = driver.find_element_by_name('password')
+        elem.send_keys(password)
+        
+        elem = driver.find_element_by_xpath('//form[1]/p[3]/input[1]') # 找到登录按钮
+        elem.click()
+    
+        assert 'EnglishPal Study Room for ' + uname in  driver.title
+    
+        # get essay content
+        elem = driver.find_element_by_id('text-content')
+        essay_content = elem.text
+    
+        elem = driver.find_element_by_id('selected-words')
+        word = random.choice(essay_content.split())
+        while 'font>' in word or 'br>' in word or 'p>' in word or len(word) < 5 or has_punctuation(word):
+            word = random.choice(essay_content.split())        
+
+        elem.send_keys(word)
+
+        elem = driver.find_element_by_xpath('//form[1]//input[1]') # 找到get所有词频按钮
+        elem.click()
+    
+        elems = driver.find_elements_by_xpath("//input[@type='checkbox']")
+        for elem in elems:
+            if elem.get_attribute('name') == 'marked':
+                elem.click()
+    
+        elem = driver.find_element_by_name('add-btn') # 找到加入我的生词簿按钮
+        elem.click()
+
+        driver.refresh()
+        driver.refresh()
+        driver.refresh()        
+        elems = driver.find_elements_by_xpath("//p[@class='new-word']/a")
+    
+        found = 0
+        for elem in elems:
+            if word in elem.text:
+                found = 1
+                break
+    
+        assert found == 1
+    finally:    
+        driver.quit()
--- a/app/test/test_add_word_and_essay_does_not_change.py
+++ b/app/test/test_add_word_and_essay_does_not_change.py
@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+# Run the docker image using the following command:
+# docker run -d -p 4444:4444 selenium/standalone-chrome
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+
+import random, time
+import string
+
+driver = webdriver.Remote('http://localhost:4444/wd/hub', DesiredCapabilities.CHROME)
+driver.implicitly_wait(10)
+
+HOME_PAGE = 'http://121.4.94.30:91/'
+
+
+def has_punctuation(s):
+    return [c for c in s if c in string.punctuation] != []
+    
+def test_add_word_and_essay_does_not_change():
+    try:
+        driver.get(HOME_PAGE)
+        assert 'English Pal -' in driver.page_source
+    
+        # login
+        elem = driver.find_element_by_link_text('登录')
+        elem.click()
+    
+        uname = 'lanhui'
+        password = 'l0ve1t'
+        elem = driver.find_element_by_name('username')
+        elem.send_keys(uname)
+    
+        elem = driver.find_element_by_name('password')
+        elem.send_keys(password)
+        
+        elem = driver.find_element_by_xpath('//form[1]/p[3]/input[1]') # 找到登录按钮
+        elem.click()
+    
+        assert 'EnglishPal Study Room for ' + uname in  driver.title
+    
+        # get essay content
+        driver.save_screenshot('./app/test/test_add_word_and_essay_does_not_change_pic0.png')    
+        elem = driver.find_element_by_id('text-content')
+        essay_content = elem.text
+    
+        elem = driver.find_element_by_id('selected-words')
+        word = random.choice(essay_content.split())
+        while 'font>' in word or 'br>' in word or 'p>' in word or len(word) < 5 or has_punctuation(word):
+            word = random.choice(essay_content.split())        
+        elem.send_keys(word)
+        elem = driver.find_element_by_xpath('//form[1]//input[1]') # 找到get所有词频按钮
+        elem.click()
+    
+        elems = driver.find_elements_by_xpath("//input[@type='checkbox']")
+        for elem in elems:
+            if elem.get_attribute('name') == 'marked':
+                elem.click()
+    
+        elem = driver.find_element_by_xpath('//form[1]/input[1]') # 找到加入我的生词簿按钮
+        elem.click()
+     
+        # get essay contant again
+        driver.save_screenshot('./app/test/test_add_word_and_essay_does_not_change_pic1.png')    
+        elem = driver.find_element_by_id('text-content')
+        current_essay_content = elem.text
+
+        index = current_essay_content.find('for you.')
+        assert current_essay_content[index:] == essay_content[index:]
+    
+        # click the Next button. Now the essay should change.
+        elem = driver.find_element_by_link_text('下一篇') # 找到get所有词频按钮
+        elem.click()
+    
+        # compare again
+        driver.save_screenshot('./app/test/test_add_word_and_essay_does_not_change_pic2.png')
+        elem = driver.find_element_by_id('text-content')
+        next_essay_content = elem.text
+    
+        assert current_essay_content[index:] != next_essay_content[index:]
+    finally:
+        driver.quit()
--- a/app/test/test_login.py
+++ b/app/test/test_login.py
@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+# Run the docker image using the following command:
+# docker run -d -p 4444:4444 selenium/standalone-chrome
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+
+import random, string
+
+driver = webdriver.Remote('http://localhost:4444/wd/hub', DesiredCapabilities.CHROME)
+driver.implicitly_wait(10)
+
+HOME_PAGE = 'http://121.4.94.30:91/'
+
+
+
+def test_login():
+    try:
+        driver.get(HOME_PAGE)
+        driver.save_screenshot('./app/test/test_login_pic0.png')
+        
+        assert 'English Pal -' in driver.page_source
+    
+        elem = driver.find_element_by_link_text('成为会员')
+        elem.click()
+    
+        uname = ''.join ( [random.choice (string.ascii_letters) for x in range (8)] )
+        elem = driver.find_element_by_name('username')
+        elem.send_keys(uname)
+    
+        elem = driver.find_element_by_name('password')
+        elem.send_keys('iamc00l!')
+    
+        driver.save_screenshot('./app/test/test_login_pic1.png')
+        
+        elem = driver.find_element_by_xpath('//form[1]/p[3]/input[1]') # 找到注册按钮
+        elem.click()
+    
+        driver.save_screenshot('./app/test/test_login_pic2.png')
+        
+        assert '恭喜，你已成功注册' in driver.page_source
+        assert uname in driver.page_source
+    
+        # logout
+        driver.get(HOME_PAGE + 'logout')
+        driver.save_screenshot('./app/test/test_login_pic3.png')
+        
+        # login
+        elem = driver.find_element_by_link_text('登录')
+        elem.click()
+    
+        elem = driver.find_element_by_name('username')
+        elem.send_keys(uname)
+    
+        elem = driver.find_element_by_name('password')
+        elem.send_keys('iamc00l!')
+        
+        elem = driver.find_element_by_xpath('//form[1]/p[3]/input[1]') # 找到登录按钮
+        elem.click()
+    
+        driver.save_screenshot('./app/test/test_login_pic4.png')    
+        assert 'EnglishPal Study Room for ' + uname in  driver.title
+    finally:
+        driver.quit()
--- a/app/test/test_next_essay.py
+++ b/app/test/test_next_essay.py
@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+# Run the docker image using the following command:
+# docker run -d -p 4444:4444 selenium/standalone-chrome
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+
+import random, string, time
+
+driver = webdriver.Remote('http://localhost:4444/wd/hub', DesiredCapabilities.CHROME)
+driver.implicitly_wait(10)
+
+HOME_PAGE = 'http://121.4.94.30:91/'
+
+
+
+def test_next():
+    try:
+        driver.get(HOME_PAGE)
+        assert 'English Pal -' in driver.page_source
+    
+        # login
+        elem = driver.find_element_by_link_text('登录')
+        elem.click()
+    
+        uname = 'lanhui'
+        password = 'l0ve1t'
+        elem = driver.find_element_by_name('username')
+        elem.send_keys(uname)
+    
+        elem = driver.find_element_by_name('password')
+        elem.send_keys(password)
+        
+        elem = driver.find_element_by_xpath('//form[1]/p[3]/input[1]') # 找到登录按钮
+        elem.click()
+    
+        assert 'EnglishPal Study Room for ' + uname in  driver.title
+    
+        # get essay content
+        driver.save_screenshot('./app/test/test_next_essay_pic0.png')    
+        elem = driver.find_element_by_id('text-content')
+        essay_content = elem.text
+    
+        # click Next
+        differ = 0
+        for i in range(3):
+            elem = driver.find_element_by_link_text('下一篇')
+            elem.click()
+            driver.save_screenshot('./app/test/test_next_essay_pic1.png')
+            elem = driver.find_element_by_id('text-content')
+            current_essay_content = elem.text
+    
+            if current_essay_content != essay_content:
+                diff = 1
+                break
+    
+        assert diff == 1
+    finally:
+        driver.quit()
+
--- a/app/test/test_signup.py
+++ b/app/test/test_signup.py
@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+# Run the docker image using the following command:
+# docker run -d -p 4444:4444 selenium/standalone-chrome
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+
+import random, string
+
+driver = webdriver.Remote('http://localhost:4444/wd/hub', DesiredCapabilities.CHROME)
+driver.implicitly_wait(10)
+
+HOME_PAGE = 'http://121.4.94.30:91/'
+
+
+
+def test_signup():
+    try:
+        driver.get(HOME_PAGE)
+        driver.save_screenshot('test_signup_pic0.png')
+        
+        assert 'English Pal -' in driver.page_source
+    
+        elem = driver.find_element_by_link_text('成为会员')
+        elem.click()
+    
+        uname = ''.join ( [random.choice (string.ascii_letters) for x in range (8)] )
+        elem = driver.find_element_by_name('username')
+        elem.send_keys(uname)
+    
+        elem = driver.find_element_by_name('password')
+        elem.send_keys('iamc00l!')
+    
+        driver.save_screenshot('test_signup_pic1.png')
+        
+        elem = driver.find_element_by_xpath('//form[1]/p[3]/input[1]') # 找到登录按钮
+        elem.click()
+    
+        driver.save_screenshot('test_signup_pic2.png')
+        
+        assert '恭喜，你已成功注册' in driver.page_source
+        assert uname in driver.page_source
+    finally:
+        driver.quit()
+
+
--- a/app/wordfreqCMD.py
+++ b/app/wordfreqCMD.py
@ -0,0 +1,123 @@
+###########################################################################
+# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
+# Written permission must be obtained from the author for commercial uses.
+###########################################################################
+
+import collections
+import string
+import operator
+import os, sys # 引入模块sys，因为我要用里面的sys.argv列表中的信息来读取命令行参数。
+import pickle_idea
+
+def freq(fruit):
+    '''
+    功能： 把字符串转成列表。 目的是得到每个单词的频率。
+    输入： 字符串
+    输出： 列表， 列表里包含一组元组，每个元组包含单词与单词的频率。 比如 [('apple', 2), ('banana', 1)]
+    注意事项： 首先要把字符串转成小写。原因是。。。
+    '''
+
+    result = []
+    
+    fruit = fruit.lower() # 字母转小写
+    flst = fruit.split()  # 字符串转成list
+    c = collections.Counter(flst)
+    result = c.most_common()
+    return result
+
+
+def youdao_link(s): # 有道链接
+    link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index'# 网址
+    return link
+
+
+def file2str(fname):#文件转字符
+    f = open(fname) #打开
+    s = f.read()    #读取
+    f.close()       #关闭
+    return s
+
+
+def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。
+    special_characters = '_©~=+[]*&$%^@.,?!:;#()"“”—‘’' # 把里面的字符都去掉
+    for c in special_characters:
+        s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
+    s = s.replace('--', ' ')
+    s = s.strip() # 去除前后的空格
+    
+    if '\'' in s:
+        n = len(s)
+        t = '' # 用来收集我需要保留的字符
+        for i in range(n): # 只有单引号前后都有英文字符，才保留
+            if s[i] == '\'':
+                i_is_ok = i - 1 >= 0 and i + 1 < n
+                if i_is_ok and s[i-1] in string.ascii_letters and s[i+1] in string.ascii_letters:
+                    t += s[i]
+            else:
+                t += s[i]
+        return t
+    else:
+        return s
+
+
+def sort_in_descending_order(lst):# 单词按频率降序排列
+    lst2 = sorted(lst, reverse=True, key=lambda x: (x[1], x[0]))
+    return lst2
+
+
+def sort_in_ascending_order(lst):# 单词按频率降序排列
+    lst2 = sorted(lst, reverse=False, key=lambda x: (x[1], x[0]))
+    return lst2
+
+
+def make_html_page(lst, fname):
+    '''
+    功能：把lst的信息存到fname中，以html格式。
+    '''
+    s = ''
+    count = 1
+    for x in lst:
+        # <a href="">word</a>
+        s += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1])
+        count += 1
+    f = open(fname, 'w')
+    f.write(s)
+    f.close()
+
+
+## main（程序入口）
+if __name__ == '__main__':
+    num = len(sys.argv)
+
+    if num == 1: # 从键盘读入字符串
+        s = input()
+    elif num == 2: # 从文件读入字符串
+        fname = sys.argv[1]
+        s = file2str(fname)
+    else:
+        print('I can accept at most 2 arguments.')
+        sys.exit()# 结束程序运行， 下面的代码不会被执行了。
+
+    s = remove_punctuation(s) # 这里是s是实参(argument)，里面有值
+    L = freq(s)
+    for x in sort_in_descending_order(L):
+        print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0])))#函数导出
+
+    # 把频率的结果放result.html中
+    make_html_page(sort_in_descending_order(L), 'result.html') 
+
+    print('\nHistory:\n')
+    if os.path.exists('frequency.p'):
+        d = pickle_idea.load_record('frequency.p')
+    else:
+        d = {}
+
+    print(sort_in_descending_order(pickle_idea.dict2lst(d)))
+
+    # 合并频率
+    lst_history = pickle_idea.dict2lst(d)
+    d = pickle_idea.merge_frequency(L, lst_history)
+    pickle_idea.save_frequency_to_pickle(d, 'frequency.p')
+
+
+