本文整理汇总了Python中dictionary.Dictionary.all_terms方法的典型用法代码示例。如果您正苦于以下问题:Python Dictionary.all_terms方法的具体用法?Python Dictionary.all_terms怎么用?Python Dictionary.all_terms使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dictionary.Dictionary
的用法示例。
在下文中一共展示了Dictionary.all_terms方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_dictionary_all_terms
# 需要导入模块: from dictionary import Dictionary [as 别名]
# 或者: from dictionary.Dictionary import all_terms [as 别名]
def test_dictionary_all_terms():
d = Dictionary()
assert_eq([], d.all_terms())
d.add_term('asdf', 1, 1)
assert_eq(['asdf'], d.all_terms())
d.add_term('asdf', 2, 1)
assert_eq(['asdf'], d.all_terms())
d.add_term('qwer', 1, 1)
d.add_term('zxcv', 1, 1)
assert_eq(
sorted(['asdf', 'qwer', 'zxcv']),
sorted(d.all_terms()))
示例2: test_dictionary_to_json_from_json
# 需要导入模块: from dictionary import Dictionary [as 别名]
# 或者: from dictionary.Dictionary import all_terms [as 别名]
def test_dictionary_to_json_from_json():
d = Dictionary()
d.add_term('asdf', 1, 1)
d.add_term('asdf', 2, 1)
d.add_term('qwer', 1, 1)
d.add_term('zxcv', 1, 1)
d2 = Dictionary.from_json(d.to_json())
assert_eq(d2.all_docs(), d.all_docs())
assert_eq(d2.all_terms(), d.all_terms())
assert_eq(d2.get_frequency('asdf'), d.get_frequency('asdf'))
assert_eq(d2.get_frequency('qwer'), d.get_frequency('qwer'))
assert_eq(d2.get_frequency('zxcv'), d.get_frequency('zxcv'))
assert_eq(d2.get_head('asdf'), d.get_head('asdf'))
assert_eq(d2.get_head('qwer'), d.get_head('qwer'))
assert_eq(d2.get_head('zxcv'), d.get_head('zxcv'))
assert_eq(d2.get_tail('asdf'), d.get_tail('asdf'))
assert_eq(d2.get_tail('qwer'), d.get_tail('qwer'))
assert_eq(d2.get_tail('zxcv'), d.get_tail('zxcv'))
示例3: build
# 需要导入模块: from dictionary import Dictionary [as 别名]
# 或者: from dictionary.Dictionary import all_terms [as 别名]
def build(training_dir, dict_file, postings_file):
dictionary = Dictionary()
# Read each file in the training dir.
filepaths = []
for filename in os.listdir(training_dir):
filepaths.append(os.path.join(training_dir, filename))
# Sort the filepaths according to doc_id
filepaths = sorted(filepaths, key=lambda x: int(os.path.basename(x)))
# Two loops here to have control over the size of the loop.
# NOTE(michael): for testing.
# filepaths = filepaths[:10]
with PostingsFile(postings_file, mode='w+') as postings_file:
for filepath in filepaths:
terms = process_file(filepath)
# TODO(michael): Making assumption that document is an int.
doc_id = int(os.path.basename(filepath))
for term in terms:
if not dictionary.has_entry(term, doc_id):
current_node_location = postings_file.pointer
if dictionary.get_frequency(term) != 0:
# Update previous node in the linked list.
previous_node_location = dictionary.get_tail(term)
previous_entry = \
postings_file.get_entry(previous_node_location)
postings_file.write_entry(
previous_entry.doc_id,
current_node_location,
write_location=previous_node_location)
dictionary.add_term(term, doc_id, current_node_location)
postings_file.write_entry(
doc_id, write_location=current_node_location)
# Skip pointers
for term in dictionary.all_terms():
term_frequency = dictionary.get_frequency(term)
skip_pointer_frequency = int(math.sqrt(term_frequency))
# Don't bother if too low.
if skip_pointer_frequency < SKIP_POINTER_THRESHOLD:
continue
head = dictionary.get_head(term)
entries = postings_file.get_entry_list_from_pointer(head)
for idx in xrange(term_frequency):
if idx % skip_pointer_frequency == 0:
skip_to = idx + skip_pointer_frequency
# Nothing to point to.
if skip_to >= term_frequency:
continue
current_entry = entries[idx]
skip_to_entry = entries[skip_to]
# Add skip pointer.
postings_file.write_entry(
current_entry.doc_id,
current_entry.next_pointer,
skip_to_entry.own_pointer,
skip_to_entry.doc_id,
write_location=current_entry.own_pointer)
# Write dictionary to file.
with open(dict_file, 'w') as dictionary_file:
dictionary_file.write(dictionary.to_json())