通过self来增删改查文本文档的内容
见2021 week12 task1
建立字典来承载文本文档的内容,此段代码要放在dictionary.py文件里。
通过从hash_table文件引入已经编译好的LinearProbeHashTable,同时也要从python内置的typing库里引入Tuple,还要再引用timeit来计时。
-
from hash_table import LinearProbeHashTable
-
from typing import Tuple
-
import timeit
-
-
-
class Dictionary:
-
DEFAULT_ENCODING = 'utf-8'
-
-
def __init__(self, hash_base: int, table_size: int) -> None:
-
self.hash_table = LinearProbeHashTable(hash_base, table_size)
-
-
def load_dictionary(self, filename: str, time_limit: int = None) -> int:
-
# self.table = LinearProbeHashTable(self.hash_base, self.table_size)
-
start_time = timeit.default_timer()
-
-
words = 0
-
with open(filename, 'r', encoding=Dictionary.DEFAULT_ENCODING) as file:
-
line = file.readline()
-
while line:
-
line = line.strip()
-
self.hash_table[line] = 1
-
if time_limit is not None and timeit.default_timer() - start_time > time_limit:
-
raise TimeoutError("Exceeded time limit: " str(time_limit))
-
words = 1
-
line = file.readline()
-
-
return words
-
-
def add_word(self, word: str) -> None:
-
self.hash_table[word.lower()] = 1
-
-
def find_word(self, word: str) -> bool:
-
return word.lower() in self.hash_table
-
-
def delete_word(self, word: str) -> None:
-
del self.hash_table[word.lower()]
-
-
-
def process_option(dictionary: Dictionary, method_name: str) -> None:
-
""" Helper code for processing menu options."""
-
if method_name == 'read_file':
-
filename = input('Enter filename: ')
-
try:
-
dictionary.load_dictionary(filename)
-
print('Successfully read file')
-
except FileNotFoundError as e:
-
print(e)
-
else:
-
word = input('Enter word: ')
-
if method_name == 'add_word':
-
dictionary.add_word(word)
-
try:
-
dictionary.add_word(word)
-
print('[{}] {}'.format(word, 'Successfully added'))
-
except IndexError as e:
-
print('[{}] {}'.format(word, e))
-
elif method_name == 'find_word':
-
if dictionary.find_word(word):
-
print('[{}] {}'.format(word, 'Found in dictionary'))
-
else:
-
print('[{}] {}'.format(word, 'Not found in dictionary'))
-
elif method_name == 'delete_word':
-
try:
-
dictionary.delete_word(word)
-
print('[{}] {}'.format(word, 'Deleted from dictionary'))
-
except KeyError:
-
print('[{}] {}'.format(word, 'Not found in dictionary'))
-
-
-
def menu(dictionary: Dictionary):
-
""" Wrapper for using the dictionary. """
-
option = None
-
menu_options = {'read_file': 'Read File',
-
'add_word': 'Add Word',
-
'find_word': 'Find Word',
-
'delete_word': 'Delete Word',
-
'exit': 'Exit'}
-
-
exit_option = list(menu_options.keys()).index('exit') 1
-
-
while option != exit_option:
-
print('---------------------')
-
opt = 1
-
for menu_option in menu_options.values():
-
print('{}. {}'.format(opt, menu_option))
-
opt = 1
-
print('---------------------')
-
try:
-
option = int(input("Enter option: "))
-
if option < 1 or option > exit_option:
-
raise ValueError('Option must be between 1 and ' str(exit_option))
-
except ValueError as e:
-
print('[{}] {}'.format('menu', e))
-
else:
-
if option != exit_option:
-
process_option(dictionary, list(menu_options.keys())[option - 1])
-
print("---------------------")
-
-
-
if __name__ == '__main__':
-
dictionary = Dictionary(31, 250727)
-
menu(dictionary)
将此段代码放在frequency.py文件里:
-
from enum import Enum
-
from string import punctuation
-
from dictionary import Dictionary
-
from hash_table import LinearProbeHashTable
-
-
-
class Rarity(Enum):
-
COMMON = 0
-
UNCOMMON = 1
-
RARE = 2
-
MISSPELT = 3
-
-
-
class Frequency:
-
# TODO
-
# raise NotImplementedError
-
def __init__(self) -> None:
-
self.hash_base = 27183
-
self.table_size = 250727
-
self.hash_table = LinearProbeHashTable(self.hash_base, self.table_size)
-
self.dictionary = Dictionary(self.hash_base, self.table_size)
-
self.dictionary.load_dictionary('english_large.txt', 10)
-
self.max_word = ('', 0)
-
-
# O(N)
-
def add_file(self, filename: str) -> None:
-
with open(filename, mode = 'r', encoding = 'utf-8') as f:
-
content = f.read().split() # split words in text
-
for word in content:
-
word = word.strip(punctuation).lower()
-
if self.dictionary.find_word(word):
-
if word in self.hash_table:
-
t = self.hash_table[word]
-
self.hash_table[word] = t 1
-
if self.max_word[1] < t 1:
-
self.max_word = (word, t 1)
-
else:
-
self.hash_table.insert(word, 1)
-
-
# O(1)
-
def rarity(self, word: str) -> Rarity:
-
cnt = self.hash_table[word]
-
if cnt >= max(self.max_word[1] / 100, 1):
-
return Rarity.COMMON
-
elif cnt >= max(self.max_word[1] / 1000, 1):
-
return Rarity.UNCOMMON
-
elif cnt != 0:
-
return Rarity.RARE
-
else:
-
return Rarity.MISSPELT
-
-
-
def frequency_analysis() -> None:
-
# TODO
-
pass
-
-
-
if __name__ == '__main__':
-
frequency_analysis()
有几个点需要注意,在#TODO部份是根据要求新添加的功能。例如,raise NotImplementedError
self.hash_base = 27183
self.table_size = 250727
定义好hash_base和table_size的大小。
定义__init__
后,执行实例化的过程须变成Frequency(arg1),新建的实例本身,连带其中的参数,会一并传给__init__
函数自动并执行它。所以__init__
函数的参数列表会在开头多出一项,它永远指代新建的那个实例对象,Python语法要求这个参数必须要有,而名称随意,习惯上就命为self
。
接下来要使用已经定义好的hash_table.py, list_adt, referential_array.py。
test_frequency.py文件来测试字典对于文本文档内容的承载以及修改是否成功,通过定义不同的参数来测试方法定义是否成功。
-
"""Unit Testing for Task 1 and 2"""
-
-
import unittest
-
import sys
-
from hash_table import LinearProbeHashTable
-
from frequency import Frequency, Rarity
-
-
-
class TestFrequency(unittest.TestCase):
-
def setUp(self) -> None:
-
self.frequency = Frequency()
-
-
def test_init(self) -> None:
-
self.assertEqual(type(self.frequency.hash_table), LinearProbeHashTable)
-
self.assertEqual(self.frequency.dictionary.find_word('test'), 1)
-
-
def test_add_file(self) -> None:
-
# TODO: Add 2 or more unit tests
-
# raise NotImplementedError
-
self.frequency.add_file('215-0.txt')
-
self.assertEqual(self.frequency.hash_table['warm'], 2)
-
self.frequency.add_file('84-0.txt')
-
self.assertEqual(self.frequency.hash_table['warm'], 11)
-
-
def test_rarity(self) -> None:
-
# TODO: Add 2 or more unit tests
-
# raise NotImplementedError
-
self.frequency.add_file('215-0.txt')
-
self.assertEqual(self.frequency.rarity('warm'), Rarity.UNCOMMON)
-
self.assertEqual(self.frequency.rarity('the'), Rarity.COMMON)
-
-
-
if __name__ == '__main__':
-
unittest.main()
-
这篇好文章是转载于:学新通技术网
- 版权申明: 本站部分内容来自互联网,仅供学习及演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,请提供相关证据及您的身份证明,我们将在收到邮件后48小时内删除。
- 本站站名: 学新通技术网
- 本文地址: /boutique/detail/tanhgagakf
系列文章
更多
同类精品
更多
-
photoshop保存的图片太大微信发不了怎么办
PHP中文网 06-15 -
Android 11 保存文件到外部存储,并分享文件
Luke 10-12 -
word里面弄一个表格后上面的标题会跑到下面怎么办
PHP中文网 06-20 -
《学习通》视频自动暂停处理方法
HelloWorld317 07-05 -
photoshop扩展功能面板显示灰色怎么办
PHP中文网 06-14 -
微信公众号没有声音提示怎么办
PHP中文网 03-31 -
excel下划线不显示怎么办
PHP中文网 06-23 -
怎样阻止微信小程序自动打开
PHP中文网 06-13 -
excel打印预览压线压字怎么办
PHP中文网 06-22 -
photoshop蒙版画笔没反应怎么办
PHP中文网 06-24