本文整理汇总了Python中dictionary.Dictionary.get_frequency方法的典型用法代码示例。如果您正苦于以下问题:Python Dictionary.get_frequency方法的具体用法?Python Dictionary.get_frequency怎么用?Python Dictionary.get_frequency使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dictionary.Dictionary
的用法示例。
在下文中一共展示了Dictionary.get_frequency方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_dictionary_add_term
# 需要导入模块: from dictionary import Dictionary [as 别名]
# 或者: from dictionary.Dictionary import get_frequency [as 别名]
def test_dictionary_add_term():
d = Dictionary()
first_pointer = 10
d.add_term('asdf', 1, first_pointer)
assert_eq(1, d.get_frequency('asdf'))
assert_eq(first_pointer, d.get_head('asdf'))
assert_eq(first_pointer, d.get_tail('asdf'))
next_pointer = 20
d.add_term('asdf', 2, next_pointer)
assert_eq(2, d.get_frequency('asdf'))
assert_eq(first_pointer, d.get_head('asdf'))
assert_eq(next_pointer, d.get_tail('asdf'))
third_pointer = 30
d.add_term('qwer', 2, third_pointer)
assert_eq(1, d.get_frequency('qwer'))
assert_eq(third_pointer, d.get_head('qwer'))
assert_eq(third_pointer, d.get_tail('qwer'))
forth_pointer = 40
d.add_term('asdf', 2, forth_pointer)
assert_eq(2, d.get_frequency('asdf'))
assert_eq(first_pointer, d.get_head('asdf'))
assert_eq(next_pointer, d.get_tail('asdf'))
示例2: test_dictionary_add_term_pointers
# 需要导入模块: from dictionary import Dictionary [as 别名]
# 或者: from dictionary.Dictionary import get_frequency [as 别名]
def test_dictionary_add_term_pointers():
d = Dictionary()
first_pointer = 0
d.add_term('asdf', 1, first_pointer)
assert_eq(1, d.get_frequency('asdf'))
assert_eq(first_pointer, d.get_head('asdf'))
assert_eq(first_pointer, d.get_tail('asdf'))
second_pointer = 10
d.add_term('asdf', 2, second_pointer)
assert_eq(2, d.get_frequency('asdf'))
assert_eq(first_pointer, d.get_head('asdf'))
assert_eq(second_pointer, d.get_tail('asdf'))
示例3: build
# 需要导入模块: from dictionary import Dictionary [as 别名]
# 或者: from dictionary.Dictionary import get_frequency [as 别名]
def build(training_dir, dict_file, postings_file):
dictionary = Dictionary()
# Read each file in the training dir.
filepaths = []
for filename in os.listdir(training_dir):
filepaths.append(os.path.join(training_dir, filename))
# Sort the filepaths according to doc_id
filepaths = sorted(filepaths, key=lambda x: int(os.path.basename(x)))
# Two loops here to have control over the size of the loop.
# NOTE(michael): for testing.
# filepaths = filepaths[:10]
with PostingsFile(postings_file, mode='w+',
entry_cls=PostingsFileEntryWithFrequencies) as postings_file:
for filepath in filepaths:
# TODO(michael): Making assumption that document is an int.
doc_id = int(os.path.basename(filepath))
terms = process_file(filepath)
for term in terms:
# Create postings file entry if entry does not exist for
# `(term, doc_id)` pair.
if not dictionary.has_entry(term, doc_id):
# Update postings file entry for previous `(term, doc_id)`
# entry for the current term. (To point to the entry we are
# about to add.
# `(term, doc_id)` pair.
if dictionary.get_frequency(term) != 0:
previous_node_location = dictionary.get_tail(term)
previous_entry = \
postings_file.get_entry(previous_node_location)
previous_entry.next_pointer = postings_file.pointer
postings_file.write_entry(previous_entry)
# Add new postings file entry for the `(term, doc_id)` pair.
dictionary.add_term(term, doc_id, postings_file.pointer)
new_entry = PostingsFileEntryWithFrequencies(doc_id)
postings_file.write_entry(new_entry)
# Update postings file entry term frequency. (Increment).
# NOTE(michael): We can safely use the tail pointer since we
# process documents in order and not at random.
current_term_location = dictionary.get_tail(term)
current_term_entry = \
postings_file.get_entry(current_term_location)
current_term_entry.term_freq += 1
postings_file.write_entry(current_term_entry)
# Write dictionary to file.
with open(dict_file, 'w') as dictionary_file:
dictionary_file.write(dictionary.to_json())
示例4: test_dictionary_to_json_from_json
# 需要导入模块: from dictionary import Dictionary [as 别名]
# 或者: from dictionary.Dictionary import get_frequency [as 别名]
def test_dictionary_to_json_from_json():
d = Dictionary()
d.add_term('asdf', 1, 1)
d.add_term('asdf', 2, 1)
d.add_term('qwer', 1, 1)
d.add_term('zxcv', 1, 1)
d2 = Dictionary.from_json(d.to_json())
assert_eq(d2.all_docs(), d.all_docs())
assert_eq(d2.all_terms(), d.all_terms())
assert_eq(d2.get_frequency('asdf'), d.get_frequency('asdf'))
assert_eq(d2.get_frequency('qwer'), d.get_frequency('qwer'))
assert_eq(d2.get_frequency('zxcv'), d.get_frequency('zxcv'))
assert_eq(d2.get_head('asdf'), d.get_head('asdf'))
assert_eq(d2.get_head('qwer'), d.get_head('qwer'))
assert_eq(d2.get_head('zxcv'), d.get_head('zxcv'))
assert_eq(d2.get_tail('asdf'), d.get_tail('asdf'))
assert_eq(d2.get_tail('qwer'), d.get_tail('qwer'))
assert_eq(d2.get_tail('zxcv'), d.get_tail('zxcv'))