From 9f3f5b43e1bc79e884091593b9b8f3d6a2164ea5 Mon Sep 17 00:00:00 2001 From: ghaa0920 <1554137355@qq.com> Date: Mon, 15 May 2023 19:15:30 +0800 Subject: [PATCH 1/3] =?UTF-8?q?special=5Fcharacters=20=3D=20'\=5F=C2=A9~+/[]*&$%^@.,=3F!:;#()"=E2=80=9C=E2=80=9D=E2=80=94=E2=80=98?= =?UTF-8?q?=E2=80=99{}|'=20=E7=94=A8=E4=BA=8E=E8=BF=87=E6=BB=A4=E5=AD=97?= =?UTF-8?q?=E7=AC=A6=EF=BC=8C=E6=88=91=E5=B0=86=E5=85=B6=E4=B8=AD=E7=9A=84?= =?UTF-8?q?=E2=80=9C-=E2=80=9D=E5=88=A0=E5=8E=BB=EF=BC=8C=E4=BD=BF?= =?UTF-8?q?=E8=BF=9E=E5=AD=97=E7=AC=A6=E6=B2=A1=E6=9C=89=E8=A2=AB=E8=BF=87?= =?UTF-8?q?=E6=BB=A4=EF=BC=8C=E5=AE=9E=E7=8E=B0=E5=BD=95=E5=85=A5=E4=BE=8B?= =?UTF-8?q?=E5=A6=82fifty-six=E7=AD=89=E7=BB=84=E5=90=88=E8=AF=8D=E7=9A=84?= =?UTF-8?q?=E5=8A=9F=E8=83=BD=E3=80=82=E5=8F=A6=E5=A4=96=E5=AF=B9=E4=BA=8E?= =?UTF-8?q?=E5=88=A0=E9=99=A4=E8=BF=87=E6=BB=A4=E6=98=AF=E5=90=A6=E4=BC=9A?= =?UTF-8?q?=E5=BC=95=E5=8F=91=E5=AD=97=E7=AC=A6bug=EF=BC=8C=E7=AD=94?= =?UTF-8?q?=E6=A1=88=E6=98=AF=E8=82=AF=E5=AE=9A=E7=9A=84=EF=BC=8C=E4=BD=86?= =?UTF-8?q?=E6=98=AF=E8=BF=99=E6=AE=B5=E4=BB=A3=E7=A0=81=E4=B8=AD=E7=9A=84?= =?UTF-8?q?=E8=BF=87=E6=BB=A4=E5=AD=97=E7=AC=A6=E8=99=BD=E7=84=B6=E5=A4=9A?= =?UTF-8?q?=EF=BC=8C=E4=BD=86=E6=98=AF=E5=B9=B6=E6=B2=A1=E6=9C=89=E5=AE=8C?= =?UTF-8?q?=E5=85=A8=E8=BF=87=E6=BB=A4=E6=8E=89=E6=89=80=E6=9C=89=E5=AD=97?= =?UTF-8?q?=E7=AC=A6=EF=BC=8C=EF=BC=88=E8=BF=87=E6=BB=A4=E7=9A=84=E5=8F=AA?= =?UTF-8?q?=E6=98=AF=E9=94=AE=E7=9B=98=E4=B8=8A=E8=83=BD=E6=89=93=E5=87=BA?= =?UTF-8?q?=E7=9A=84=E5=AD=97=E7=AC=A6=EF=BC=8C=E4=B8=8D=E5=8C=85=E6=8B=AC?= =?UTF-8?q?=E8=BE=93=E5=85=A5=E6=B3=95=E4=B8=AD=E8=83=BD=E6=89=93=E5=87=BA?= =?UTF-8?q?=E7=9A=84=E7=89=B9=E6=AE=8A=E5=AD=97=E7=AC=A6=EF=BC=89=EF=BC=8C?= =?UTF-8?q?=E6=89=80=E4=BB=A5=E5=AD=97=E7=AC=A6bug=E6=9C=AC=E8=BA=AB?= =?UTF-8?q?=E5=B0=B1=E4=B8=80=E7=9B=B4=E5=AD=98=E5=9C=A8=EF=BC=8C=E6=88=91?= =?UTF-8?q?=E8=AE=A4=E4=B8=BA=E5=87=8F=E5=B0=91=E4=B8=80=E4=B8=AA=E5=AF=B9?= =?UTF-8?q?=E2=80=9C-=E2=80=9D=E5=AD=97=E7=AC=A6=E7=9A=84=E8=BF=87?= =?UTF-8?q?=E6=BB=A4=E4=B8=8D=E4=BC=9A=E9=80=A0=E6=88=90=E9=97=AE=E9=A2=98?= =?UTF-8?q?=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/wordfreqCMD.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/wordfreqCMD.py b/app/wordfreqCMD.py index c4f8a63..b112c3b 100644 --- a/app/wordfreqCMD.py +++ b/app/wordfreqCMD.py @@ -39,7 +39,7 @@ def file2str(fname):#文件转字符 def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。 - special_characters = '\_©~<=>+-/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉 + special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉 for c in special_characters: s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况 s = s.replace('--', ' ') From acd8db6e3e1c498650f2e8cc3021828e6132e823 Mon Sep 17 00:00:00 2001 From: ghaa0920 <1554137355@qq.com> Date: Mon, 15 May 2023 19:24:43 +0800 Subject: [PATCH 2/3] =?UTF-8?q?special=5Fcharacters=20=3D=20'\=5F=C2=A9~+/[]*&$%^@.,=3F!:;#()"=E2=80=9C=E2=80=9D=E2=80=94=E2=80=98?= =?UTF-8?q?=E2=80=99{}|'=20=E7=94=A8=E4=BA=8E=E8=BF=87=E6=BB=A4=E5=AD=97?= =?UTF-8?q?=E7=AC=A6=EF=BC=8C=E6=88=91=E5=B0=86=E5=85=B6=E4=B8=AD=E7=9A=84?= =?UTF-8?q?=E2=80=9C-=E2=80=9D=E5=88=A0=E5=8E=BB=EF=BC=8C=E4=BD=BF?= =?UTF-8?q?=E8=BF=9E=E5=AD=97=E7=AC=A6=E6=B2=A1=E6=9C=89=E8=A2=AB=E8=BF=87?= =?UTF-8?q?=E6=BB=A4=EF=BC=8C=E5=AE=9E=E7=8E=B0=E5=BD=95=E5=85=A5=E4=BE=8B?= =?UTF-8?q?=E5=A6=82fifty-six=E7=AD=89=E7=BB=84=E5=90=88=E8=AF=8D=E7=9A=84?= =?UTF-8?q?=E5=8A=9F=E8=83=BD=E3=80=82=E5=8F=A6=E5=A4=96=E5=AF=B9=E4=BA=8E?= =?UTF-8?q?=E5=88=A0=E9=99=A4=E8=BF=87=E6=BB=A4=E6=98=AF=E5=90=A6=E4=BC=9A?= =?UTF-8?q?=E5=BC=95=E5=8F=91=E5=AD=97=E7=AC=A6bug=EF=BC=8C=E7=AD=94?= =?UTF-8?q?=E6=A1=88=E6=98=AF=E8=82=AF=E5=AE=9A=E7=9A=84=EF=BC=8C=E4=BD=86?= =?UTF-8?q?=E6=98=AF=E8=BF=99=E6=AE=B5=E4=BB=A3=E7=A0=81=E4=B8=AD=E7=9A=84?= =?UTF-8?q?=E8=BF=87=E6=BB=A4=E5=AD=97=E7=AC=A6=E8=99=BD=E7=84=B6=E5=A4=9A?= =?UTF-8?q?=EF=BC=8C=E4=BD=86=E6=98=AF=E5=B9=B6=E6=B2=A1=E6=9C=89=E5=AE=8C?= =?UTF-8?q?=E5=85=A8=E8=BF=87=E6=BB=A4=E6=8E=89=E6=89=80=E6=9C=89=E5=AD=97?= =?UTF-8?q?=E7=AC=A6=EF=BC=8C=EF=BC=88=E8=BF=87=E6=BB=A4=E7=9A=84=E5=8F=AA?= =?UTF-8?q?=E6=98=AF=E9=94=AE=E7=9B=98=E4=B8=8A=E8=83=BD=E6=89=93=E5=87=BA?= =?UTF-8?q?=E7=9A=84=E5=AD=97=E7=AC=A6=EF=BC=8C=E4=B8=8D=E5=8C=85=E6=8B=AC?= =?UTF-8?q?=E8=BE=93=E5=85=A5=E6=B3=95=E4=B8=AD=E8=83=BD=E6=89=93=E5=87=BA?= =?UTF-8?q?=E7=9A=84=E7=89=B9=E6=AE=8A=E5=AD=97=E7=AC=A6=EF=BC=89=EF=BC=8C?= =?UTF-8?q?=E6=89=80=E4=BB=A5=E5=AD=97=E7=AC=A6bug=E6=9C=AC=E8=BA=AB?= =?UTF-8?q?=E5=B0=B1=E4=B8=80=E7=9B=B4=E5=AD=98=E5=9C=A8=EF=BC=8C=E6=88=91?= =?UTF-8?q?=E8=AE=A4=E4=B8=BA=E5=87=8F=E5=B0=91=E4=B8=80=E4=B8=AA=E5=AF=B9?= =?UTF-8?q?=E2=80=9C1-=E2=80=9D=E5=AD=97=E7=AC=A6=E7=9A=84=E8=BF=87?= =?UTF-8?q?=E6=BB=A4=E4=B8=8D=E4=BC=9A=E9=80=A0=E6=88=90=E9=97=AE=E9=A2=98?= =?UTF-8?q?=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/wordfreqCMD.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/wordfreqCMD.py b/app/wordfreqCMD.py index b112c3b..fb67873 100644 --- a/app/wordfreqCMD.py +++ b/app/wordfreqCMD.py @@ -39,7 +39,7 @@ def file2str(fname):#文件转字符 def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。 - special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉 + special_characters = '\_©~<=>-+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉 for c in special_characters: s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况 s = s.replace('--', ' ') From 030b89706e51a35d95eb6eafa7ae2b2fefac821b Mon Sep 17 00:00:00 2001 From: ghaa0920 <1554137355@qq.com> Date: Sat, 20 May 2023 15:29:12 +0800 Subject: [PATCH 3/3] =?UTF-8?q?special=5Fcharacters=20=3D=20'\=5F=C2=A9~+/[]*&$%^@.,=3F!:;#()"=E2=80=9C=E2=80=9D=E2=80=94=E2=80=98?= =?UTF-8?q?=E2=80=99{}|'=20=E7=94=A8=E4=BA=8E=E8=BF=87=E6=BB=A4=E5=AD=97?= =?UTF-8?q?=E7=AC=A6=EF=BC=8C=E6=88=91=E5=B0=86=E5=85=B6=E4=B8=AD=E7=9A=84?= =?UTF-8?q?=E2=80=9C-=E2=80=9D=E5=88=A0=E5=8E=BB=EF=BC=8C=E4=BD=BF?= =?UTF-8?q?=E8=BF=9E=E5=AD=97=E7=AC=A6=E6=B2=A1=E6=9C=89=E8=A2=AB=E8=BF=87?= =?UTF-8?q?=E6=BB=A4=EF=BC=8C=E5=AE=9E=E7=8E=B0=E5=BD=95=E5=85=A5=E4=BE=8B?= =?UTF-8?q?=E5=A6=82fifty-six=E7=AD=89=E7=BB=84=E5=90=88=E8=AF=8D=E7=9A=84?= =?UTF-8?q?=E5=8A=9F=E8=83=BD=E3=80=82=E5=8F=A6=E5=A4=96=E5=AF=B9=E4=BA=8E?= =?UTF-8?q?=E5=88=A0=E9=99=A4=E8=BF=87=E6=BB=A4=E6=98=AF=E5=90=A6=E4=BC=9A?= =?UTF-8?q?=E5=BC=95=E5=8F=91=E5=AD=97=E7=AC=A6bug=EF=BC=8C=E7=AD=94?= =?UTF-8?q?=E6=A1=88=E6=98=AF=E8=82=AF=E5=AE=9A=E7=9A=84=EF=BC=8C=E4=BD=86?= =?UTF-8?q?=E6=98=AF=E8=BF=99=E6=AE=B5=E4=BB=A3=E7=A0=81=E4=B8=AD=E7=9A=84?= =?UTF-8?q?=E8=BF=87=E6=BB=A4=E5=AD=97=E7=AC=A6=E8=99=BD=E7=84=B6=E5=A4=9A?= =?UTF-8?q?=EF=BC=8C=E4=BD=86=E6=98=AF=E5=B9=B6=E6=B2=A1=E6=9C=89=E5=AE=8C?= =?UTF-8?q?=E5=85=A8=E8=BF=87=E6=BB=A4=E6=8E=89=E6=89=80=E6=9C=89=E5=AD=97?= =?UTF-8?q?=E7=AC=A6=EF=BC=8C=EF=BC=88=E8=BF=87=E6=BB=A4=E7=9A=84=E5=8F=AA?= =?UTF-8?q?=E6=98=AF=E9=94=AE=E7=9B=98=E4=B8=8A=E8=83=BD=E6=89=93=E5=87=BA?= =?UTF-8?q?=E7=9A=84=E5=AD=97=E7=AC=A6=EF=BC=8C=E4=B8=8D=E5=8C=85=E6=8B=AC?= =?UTF-8?q?=E8=BE=93=E5=85=A5=E6=B3=95=E4=B8=AD=E8=83=BD=E6=89=93=E5=87=BA?= =?UTF-8?q?=E7=9A=84=E7=89=B9=E6=AE=8A=E5=AD=97=E7=AC=A6=EF=BC=89=EF=BC=8C?= =?UTF-8?q?=E6=89=80=E4=BB=A5=E5=AD=97=E7=AC=A6bug=E6=9C=AC=E8=BA=AB?= =?UTF-8?q?=E5=B0=B1=E4=B8=80=E7=9B=B4=E5=AD=98=E5=9C=A8=EF=BC=8C=E6=88=91?= =?UTF-8?q?=E8=AE=A4=E4=B8=BA=E5=87=8F=E5=B0=91=E4=B8=80=E4=B8=AA=E2=80=9C?= =?UTF-8?q?-=E2=80=9D=E5=AD=97=E7=AC=A6=E5=AF=B9=E7=A8=8B=E5=BA=8F?= =?UTF-8?q?=E7=9A=84=E8=BF=87=E6=BB=A4=E8=BF=87=E7=A8=8B=E4=B8=8D=E4=BC=9A?= =?UTF-8?q?=E9=80=A0=E6=88=90=E9=97=AE=E9=A2=98=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/wordfreqCMD.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/wordfreqCMD.py b/app/wordfreqCMD.py index fb67873..b112c3b 100644 --- a/app/wordfreqCMD.py +++ b/app/wordfreqCMD.py @@ -39,7 +39,7 @@ def file2str(fname):#文件转字符 def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。 - special_characters = '\_©~<=>-+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉 + special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉 for c in special_characters: s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况 s = s.replace('--', ' ')