From 4ce937568716177d90ffbdde3c8ad1e5e935bc1d Mon Sep 17 00:00:00 2001 From: Hui Lan Date: Sun, 31 Mar 2019 22:50:14 +0800 Subject: updated lecture notes --- LectureNotesOnPython.html | 398 +++++++++++++++++++++++++++++++++++----------- LectureNotesOnPython.rst | 391 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 628 insertions(+), 161 deletions(-) diff --git a/LectureNotesOnPython.html b/LectureNotesOnPython.html index 966a1b5..34d3462 100644 --- a/LectureNotesOnPython.html +++ b/LectureNotesOnPython.html @@ -377,41 +377,48 @@ ul.auto-toc {

内容目录

-

Python的发音纠正

+

Python的发音纠正

国人普遍把th发作s。 Not quite correct。

ˈpī-ˌthän , -thən pronounciation

-

Python源流

+

Python源流

Python之父Guido van Rossum,荷兰人,1956年生,1982年阿姆斯特丹大学获得 数学与计算机科学硕士学位。有过ABC语言的工作经验。1989年设计了Python语 言。

@@ -461,7 +468,7 @@ ul.auto-toc {
-

Python的关键词

+

Python的关键词

def pass
from import
@@ -483,7 +490,7 @@ ul.auto-toc {

关键词被语言留用(reserved),无法作变量名。

-

值的类型

+

值的类型

所有的值都是对象。a = 5, help(a) a.bit_length()

数字。1, 1.,1.1, .1, 1e1, 1e-1, 1E1, 1E-1

@@ -503,7 +510,7 @@ A list of objects

元组(tuple),字典(dict)。

-

变量(Variable)

+

变量(Variable)

是一个名字(name),是指向一个值(value)的名字。

值存放在内存(memory)中的某个地址。

尽量选有意义的简短的名字。比如,代表个数用n,代表索引用i,j,k。

@@ -592,7 +599,7 @@ A list of objects
-

可变(mutable)类型与不可变类型

+

可变(mutable)类型与不可变类型

字符串是不可变的(immutable)类型,不能在原内存地址改变。

a = 'hello' 不可以原地修改a[0] = 'H'。需要修改a的值时,需要对a进行重新赋值a = 'Hello'。

列表是可变(mutable)类型,能在原内存地址改变。

@@ -617,7 +624,8 @@ A list of objects
-

数与格式化显示

+

数与格式化显示

+
x = 3.1415926
@@ -661,9 +669,10 @@ A list of objects
print('%f' % (x))
+
-

字符串(Strings)

+

字符串(Strings)

由字符组成。

fruit = 'banana!'
@@ -719,7 +728,7 @@ A list of objects

以上 # [start,stop,step] 代表注释(comment),注释以 # 号开头。

-

字符串相加(concatenation)

+

字符串相加(concatenation)

输出Jack, Kack, Lack, Mack, Nack, Ouack, Pack, and Quack

prefixes = 'JKLMNOPQ'
@@ -738,7 +747,7 @@ A list of objects
-

子串(slice)

+

子串(slice)

s[n:m],其中n或m可省略。 包括第n个字符,不包括第m个字符。(索引自0开始)

@@ -762,7 +771,7 @@ A list of objects
-

搜索字符串

+

搜索字符串

def find(word, c):
@@ -788,7 +797,7 @@ A list of objects

练习:用上面三参数的find来做。

-

String类(对象)方法

+

String类(对象)方法

upper()
lower()
@@ -801,14 +810,14 @@ A list of objects
-

in操作符

+

in操作符

'a' in 'banana' 'seed' in 'banana'

练习:写出下面的函数,使得 in_both('apples', 'oranges')返回'aes'

-

字符串比较

+

字符串比较

字典序(alphabetical order)。大写字母排在小写字母前。

字符串之间可以有以下对比操作:

@@ -824,7 +833,7 @@ in_both('apples', 'oranges')返回'aes'

即兴定义函数,制造一个长度不小于4的密码。

-

列表

+

列表

语言的内置(built-in)类型。注意与String类比,index也是从0开始, in操作符, 求长度,获得字串,遍历操作类似。

@@ -865,7 +874,7 @@ in_both('apples', 'oranges')返回'aes'

return 2*x

list(map(f, [1,2]]))

-

filter方法,从几个值中选择符合条件的几个值

+

filter方法,从几个值中选择符合条件的几个值。

def f(x):
@@ -942,7 +951,7 @@ a与b是指向[1,2,3]的两个references。 error-prone(易错)

-

列表作为参数

+

列表作为参数

def delete_head(t):
@@ -958,7 +967,7 @@ error-prone(易错)

-

注意区别 append+ 操作符

+

注意区别 append+ 操作符

t1 = [1, 2]
@@ -985,18 +994,53 @@ error-prone(易错)

-

TDD - Test-driven Development

+

TDD - Test-driven Development

测试驱动开发。 My favourite。 刺激有挑战性。 帮助厘清需求。 帮助编写代码。

-

推荐使用pytest。如何安装? 使用命令 pip install pytest

+

推荐使用pytest。如何安装? 使用命令 pip install pytest

+

test_cases.py 写如下测试用例。然后在命令行运行: python -m pytest test_cases.py

+
+# Copyright (c) Hui Lan 2019
+
+import random
+import string
+
+def make_password(n):
+    '''
+    Return a string of length n consisting of a combination of
+    letters, digits and special characters.  Note that each password
+    must have at least one lower case letter, one upper case letter,
+    one digit and one special charater.  Return an empty string if n
+    is less than 4.
+    '''
+
+    if n < 4:
+        return ''
+
+    password = random.choice(string.ascii_lowercase) + \
+        random.choice(string.ascii_uppercase) + \
+        random.choice(string.digits) + \
+        random.choice(string.punctuation) + \
+        ''.join([random.choice(string.ascii_letters + string.digits + string.punctuation) for i in range(n-4)])
+
+    return ''.join(random.sample(password, n)) # shuffle password then return
+
+
+
+
+if __name__ == '__main__':
+    for n in range(0,20):
+        pwd = make_password(n)
+        print(pwd)
+
-

计算复杂度

+

计算复杂度

用Big O表述复杂度。O(n), O(n^2), O(n^3)。

密码实验回顾。

-

字典(Dictionary)

+

字典(Dictionary)

Mutable数据类型。

实际开发中超级有用。

@@ -1020,66 +1064,228 @@ error-prone(易错)

key-value pair (item)

item的顺序不可预测,不是按照创建时的顺序。

-

递增开发(Incremental Development)

+

递增开发(Incremental Development)

每次完成一小点。从易到难。

练习:给定一个字符串,数出每个字母出现的频率。

-
-
-
def histogram(s):
-
-
''' Cannot pass any test cases. '''
-
pass
-
-
-
-
def histogram(s):
-
-
''' Can pass the test case in which s is an empty string. '''
-
d = {}
-
return d
-
-
-
-
def histogram(s):
-
-
''' Can pass the test cases in which all characters in s are unique. '''
-
d = {}
-
for c in s:
-
-
d[c] = 1
-
-
return d
-
-
-
-
def histogram(s):
-
-
''' Can pass all test cases. '''
-
d = {}
-
for c in s:
-
-
if c not in d:
-
-
d[c] = 1
-
-
else:
-
-
d[c] += 1
-
+
+def histogram(s):
+    ''' Cannot pass any test cases. '''
+    pass
+
+def histogram(s):
+    ''' Can pass the test case in which s is an empty string. '''
+    d = {}
+    return d
+
+def histogram(s):
+    ''' Can pass the test cases in which all characters in s are unique. '''
+    d = {}
+    for c in s:
+        d[c] = 1
+    return d
+
+def histogram(s):
+    ''' Can pass all test cases. '''
+    d = {}
+    for c in s:
+        if c not in d:
+            d[c] = 1
+        else:
+            d[c] += 1
+    return d
+
+
+h = histogram('good')
+print(h)
+
+

练习:给定一个字符串,数出每个单词出现的频率。

+

练习:给定一个新闻文本,数出每个单词出现的频率。考虑以下方面,(1)只考虑字典里有的单词。(2)单词周围如有标点符号,要先移除。

+
+# Copyright (C) 2019 Hui Lan
+# The following line fixes SyntaxError: Non-UTF-8 code starting with ...
+# coding=utf8
+
+def file2lst(fname):
+    ''' Return a list where each element is a word from fname. '''
+    L = []
+    f = open(fname)
+    for line in f:
+        line = line.strip()
+        lst = line.split()
+        for x in lst:
+            L.append(x)
+    f.close()
+    return L
+
+
+def lst2dict(lst):
+    ''' Return a dictionary given list lst.  Each key is an element in the lst.
+    The value is always 1.'''
+    d = {}
+    for w in lst:
+        d[w] = 1
+    return d
+
+
+import string
+def remove_punctuation(s):
+    p = ',.:’“”' + string.punctuation
+    t = ''
+    for c in s:
+        if not c in p:
+            t += c
+        elif c == '’': # handle the case such as May’s
+            return t
+    return t
+
+def word_frequency(fname, english_dictionary):
+    ''' Return a dictionary where each key is a word both in the file fname and in
+    the dictionary english_dictionary, and the corresponding value is the frequency
+    of that word.'''
+    d = {}
+    L = file2lst(fname)
+    for x in L:
+        x = remove_punctuation(x.lower())
+        if x in english_dictionary:
+            if not x in d:
+                d[x] = 1
+            else:
+                d[x] += 1
+    return d
+
+
+def sort_by_value(d):
+    ''' Return a sorted list of tuples, each tuple containing a key and a value.
+        Note that the tuples are order in descending order of the value.'''
+    import operator
+    lst = sorted(d.items(), key=operator.itemgetter(1), reverse=True)
+    return lst
+
+
+if __name__ == '__main__':
+    ed = lst2dict(file2lst('words.txt')) # from http://greenteapress.com/thinkpython2/code/words.txt
+    d = word_frequency('brexit-news.txt', ed)
+    lst = sort_by_value(d)
+    for x in lst:
+        print('%s (%d)' % (x[0], x[1]))
+
+
+
+

key与value互换

+

注意到在原来的字典中一个value可能对应多个key的值。比如 d = {'a':1, 'b':2, 'c':2} 中,2就对应两个key,'b'与'c'。

+
+def inverse_dictionary(d):
+    d2 = {}
+    for k in d:
+        v = d[k]
+        if not v in d2:
+            d2[v] = [k]
+        else:
+            d2[v].append(k)
+    return d2
+
+
+
+d = {'a':1, 'b':2, 'c':2}
+d2 = inverse_dictionary(d)
+print(d2)
+
+

练习: 用 inverse_dictionary 对上面 d = word_frequency('brexit-news.txt', ed) 产生的 d 进行转化。然后按照单词出现频率从高到低把所有单词都显示出来。每行显示一个频率内的所有单词。

+
+d2 = inverse_dictionary(d)
+for k in sorted(d2.keys(), reverse=True):
+    print('%d %s' % (k, ' '.join(d2[k])))
+
+

练习: 使用 setdefault 方法对上面的 inverse_dictionary 进行简化 (减少行数)。

+
+def inverse_dictionary(d):
+    d2 = {}
+    for k in d:
+        v = d[k]
+        d2.setdefault(v, []).append(k)
+
+    return d2
+
-
return d
+
+

函数

+

函数 unique_wordsunique_words2 哪个运行速度快?

+
+def unique_words(lst):
+    d = {}
+    for x in lst:
+        d[x] = 1
+    return sorted(d.keys())
+
+def unique_words2(lst):
+    return sorted(list(set(lst)))
+
+
+N = 10000000
+print(unique_words(['hello', 'world', 'am', 'he'] * N))
+print(unique_words2(['hello', 'world', 'am', 'he'] * N))
+
+
+

局部变量

+

在函数之内。函数执行结束,局部变量消失。

-
-
h = histogram('good')
-
print(h)
+
+

全局变量

+

全局变量位于函数之外,模块之内。全局变量对所有模块内的函数可见(可读)。如果在函数内要对全局变量重新赋值,那么要先用 global 声明之 (declare)。

+
+verbose = True
+
+def example1():
+    if verbose:
+        print('Running example1')
+
+def example2():
+    verbose = False  # a NEW local variable verbose
+    if verbose:
+        print('Running example2')
+
+def example3():
+    global verbose # I am actually going to use the global variable verbose; don't create a local one.
+    verbose = False
+    if verbose:
+        print('Running example3')
+
+
+
+print(verbose)
+example1()
+
+print(verbose)
+example2()
+example1()
+
+print(verbose)
+example3()
+example1()
+
+print(verbose)
+
+

全局的列表与字典,如果只需改变其内容,而不是重新赋值,则不需要用 global 声明。

+
+record = {'s1':65, 's2':60}
+
+def add_score(student, score):
+    record[student] = score
+
+
+print(record)
+add_score('s3', 75)
+print(record)
+
+

练习: 定义一个函数 empty_dict 清空字典 record。 要求: 不能用 return 语句。 提示: 可以用 pop 方法, 或者直接给 record 赋值 {}

-
-

练习:给定一个字符串,数出每个单词出现的频率。

+
-
-

参考

+
+

参考

diff --git a/LectureNotesOnPython.rst b/LectureNotesOnPython.rst index 8505e95..80e9d5e 100644 --- a/LectureNotesOnPython.rst +++ b/LectureNotesOnPython.rst @@ -263,45 +263,45 @@ a = [1, 2] 可以原地修改a[0] = 2 数与格式化显示 ------------------------- -| x = 3.1415926 - -| print('%4.0f' % (x)) -| print('%4.1f' % (x)) -| print('%4.2f' % (x)) -| print('%4.3f' % (x)) -| print('%4.4f' % (x)) - - -| print('%6.0f' % (x)) -| print('%6.1f' % (x)) -| print('%6.2f' % (x)) -| print('%6.3f' % (x)) -| print('%6.4f' % (x)) - - -| print('%.0f' % (x)) -| print('%.1f' % (x)) -| print('%.2f' % (x)) -| print('%.3f' % (x)) -| print('%.4f' % (x)) -| print('%.5f' % (x)) -| print('%.6f' % (x)) -| print('%.7f' % (x)) -| print('%.8f' % (x)) -| print('%.9f' % (x)) -| print('%.15f' % (x)) -| print('%.16f' % (x)) -| print('%.17f' % (x)) -| print('%.18f' % (x)) - -| print('%4.f' % (x)) -| print('%5.f' % (x)) -| print('%6.f' % (x)) -| print('%7.f' % (x)) -| print('%8.f' % (x)) - -| print('%f' % (x)) - + | x = 3.1415926 + + | print('%4.0f' % (x)) + | print('%4.1f' % (x)) + | print('%4.2f' % (x)) + | print('%4.3f' % (x)) + | print('%4.4f' % (x)) + + + | print('%6.0f' % (x)) + | print('%6.1f' % (x)) + | print('%6.2f' % (x)) + | print('%6.3f' % (x)) + | print('%6.4f' % (x)) + + + | print('%.0f' % (x)) + | print('%.1f' % (x)) + | print('%.2f' % (x)) + | print('%.3f' % (x)) + | print('%.4f' % (x)) + | print('%.5f' % (x)) + | print('%.6f' % (x)) + | print('%.7f' % (x)) + | print('%.8f' % (x)) + | print('%.9f' % (x)) + | print('%.15f' % (x)) + | print('%.16f' % (x)) + | print('%.17f' % (x)) + | print('%.18f' % (x)) + + | print('%4.f' % (x)) + | print('%5.f' % (x)) + | print('%6.f' % (x)) + | print('%7.f' % (x)) + | print('%8.f' % (x)) + + | print('%f' % (x)) + 字符串(Strings) @@ -535,8 +535,8 @@ def f(x): list(map(f, [1,2]])) +filter方法,从几个值中选择符合条件的几个值。 -filter方法,从几个值中选择符合条件的几个值 | def f(x): | if x % 2 == 0: @@ -647,7 +647,45 @@ TDD - Test-driven Development 测试驱动开发。 My favourite。 刺激有挑战性。 帮助厘清需求。 帮助编写代码。 -推荐使用pytest。如何安装? 使用命令 ``pip install pytest`` +推荐使用pytest。如何安装? 使用命令 ``pip install pytest``。 + +在 ``test_cases.py`` 写如下测试用例。然后在命令行运行: ``python -m pytest test_cases.py`` 。 + +.. code:: python + + # Copyright (c) Hui Lan 2019 + + import random + import string + + def make_password(n): + ''' + Return a string of length n consisting of a combination of + letters, digits and special characters. Note that each password + must have at least one lower case letter, one upper case letter, + one digit and one special charater. Return an empty string if n + is less than 4. + ''' + + if n < 4: + return '' + + password = random.choice(string.ascii_lowercase) + \ + random.choice(string.ascii_uppercase) + \ + random.choice(string.digits) + \ + random.choice(string.punctuation) + \ + ''.join([random.choice(string.ascii_letters + string.digits + string.punctuation) for i in range(n-4)]) + + return ''.join(random.sample(password, n)) # shuffle password then return + + + + + if __name__ == '__main__': + for n in range(0,20): + pwd = make_password(n) + print(pwd) + @@ -697,39 +735,262 @@ item的顺序不可预测,不是按照创建时的顺序。 练习:给定一个字符串,数出每个字母出现的频率。 - | def histogram(s): - | ''' Cannot pass any test cases. ''' - | pass +.. code:: python + + def histogram(s): + ''' Cannot pass any test cases. ''' + pass - | def histogram(s): - | ''' Can pass the test case in which s is an empty string. ''' - | d = {} - | return d + def histogram(s): + ''' Can pass the test case in which s is an empty string. ''' + d = {} + return d - | def histogram(s): - | ''' Can pass the test cases in which all characters in s are unique. ''' - | d = {} - | for c in s: - | d[c] = 1 - | return d + def histogram(s): + ''' Can pass the test cases in which all characters in s are unique. ''' + d = {} + for c in s: + d[c] = 1 + return d - | def histogram(s): - | ''' Can pass all test cases. ''' - | d = {} - | for c in s: - | if c not in d: - | d[c] = 1 - | else: - | d[c] += 1 - | return d + def histogram(s): + ''' Can pass all test cases. ''' + d = {} + for c in s: + if c not in d: + d[c] = 1 + else: + d[c] += 1 + return d - | h = histogram('good') - | print(h) + h = histogram('good') + print(h) 练习:给定一个字符串,数出每个单词出现的频率。 +练习:给定一个新闻文本,数出每个单词出现的频率。考虑以下方面,(1)只考虑字典里有的单词。(2)单词周围如有标点符号,要先移除。 + +.. code:: python + + # Copyright (C) 2019 Hui Lan + # The following line fixes SyntaxError: Non-UTF-8 code starting with ... + # coding=utf8 + + def file2lst(fname): + ''' Return a list where each element is a word from fname. ''' + L = [] + f = open(fname) + for line in f: + line = line.strip() + lst = line.split() + for x in lst: + L.append(x) + f.close() + return L + + + def lst2dict(lst): + ''' Return a dictionary given list lst. Each key is an element in the lst. + The value is always 1.''' + d = {} + for w in lst: + d[w] = 1 + return d + + + import string + def remove_punctuation(s): + p = ',.:’“”' + string.punctuation + t = '' + for c in s: + if not c in p: + t += c + elif c == '’': # handle the case such as May’s + return t + return t + + def word_frequency(fname, english_dictionary): + ''' Return a dictionary where each key is a word both in the file fname and in + the dictionary english_dictionary, and the corresponding value is the frequency + of that word.''' + d = {} + L = file2lst(fname) + for x in L: + x = remove_punctuation(x.lower()) + if x in english_dictionary: + if not x in d: + d[x] = 1 + else: + d[x] += 1 + return d + + + def sort_by_value(d): + ''' Return a sorted list of tuples, each tuple containing a key and a value. + Note that the tuples are order in descending order of the value.''' + import operator + lst = sorted(d.items(), key=operator.itemgetter(1), reverse=True) + return lst + + + if __name__ == '__main__': + ed = lst2dict(file2lst('words.txt')) # from http://greenteapress.com/thinkpython2/code/words.txt + d = word_frequency('brexit-news.txt', ed) + lst = sort_by_value(d) + for x in lst: + print('%s (%d)' % (x[0], x[1])) + + + +key与value互换 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +注意到在原来的字典中一个value可能对应多个key的值。比如 ``d = {'a':1, 'b':2, 'c':2}`` 中,2就对应两个key,'b'与'c'。 + + +.. code:: python + + def inverse_dictionary(d): + d2 = {} + for k in d: + v = d[k] + if not v in d2: + d2[v] = [k] + else: + d2[v].append(k) + return d2 + + + + d = {'a':1, 'b':2, 'c':2} + d2 = inverse_dictionary(d) + print(d2) + + +练习: 用 ``inverse_dictionary`` 对上面 ``d = word_frequency('brexit-news.txt', ed)`` 产生的 ``d`` 进行转化。然后按照单词出现频率从高到低把所有单词都显示出来。每行显示一个频率内的所有单词。 + + +.. code:: python + + d2 = inverse_dictionary(d) + for k in sorted(d2.keys(), reverse=True): + print('%d %s' % (k, ' '.join(d2[k]))) + + +练习: 使用 ``setdefault`` 方法对上面的 ``inverse_dictionary`` 进行简化 (减少行数)。 + + +.. code:: python + + + + def inverse_dictionary(d): + d2 = {} + for k in d: + v = d[k] + d2.setdefault(v, []).append(k) + + return d2 + + + + +函数 +------ + +函数 ``unique_words`` 与 ``unique_words2`` 哪个运行速度快? + +.. code:: python + + def unique_words(lst): + d = {} + for x in lst: + d[x] = 1 + return sorted(d.keys()) + + def unique_words2(lst): + return sorted(list(set(lst))) + + + N = 10000000 + print(unique_words(['hello', 'world', 'am', 'he'] * N)) + print(unique_words2(['hello', 'world', 'am', 'he'] * N)) + + + +局部变量 +~~~~~~~~~~~~~~~~ + +在函数之内。函数执行结束,局部变量消失。 + + +全局变量 +~~~~~~~~~~~~~~~~ + +全局变量位于函数之外,模块之内。全局变量对所有模块内的函数可见(可读)。如果在函数内要对全局变量重新赋值,那么要先用 ``global`` 声明之 (declare)。 + + +.. code:: python + + verbose = True + + def example1(): + if verbose: + print('Running example1') + + def example2(): + verbose = False # a NEW local variable verbose + if verbose: + print('Running example2') + + def example3(): + global verbose # I am actually going to use the global variable verbose; don't create a local one. + verbose = False + if verbose: + print('Running example3') + + + + print(verbose) + example1() + + print(verbose) + example2() + example1() + + print(verbose) + example3() + example1() + + print(verbose) + + + +全局的列表与字典,如果只需改变其内容,而不是重新赋值,则不需要用 ``global`` 声明。 + + +.. code:: python + + record = {'s1':65, 's2':60} + + def add_score(student, score): + record[student] = score + + + print(record) + add_score('s3', 75) + print(record) + + +练习: 定义一个函数 ``empty_dict`` 清空字典 ``record``。 要求: 不能用 ``return`` 语句。 提示: 可以用 ``pop`` 方法, 或者直接给 ``record`` 赋值 ``{}`` 。 + + +函数执行顺序 +~~~~~~~~~~~~~~~~~~~~~~~~~ + + 参考 ------ -- cgit v1.2.1