当前位置: 首页>>代码示例>>Python>>正文


Python Dictionary.add_term方法代码示例

本文整理汇总了Python中dictionary.Dictionary.add_term方法的典型用法代码示例。如果您正苦于以下问题:Python Dictionary.add_term方法的具体用法?Python Dictionary.add_term怎么用?Python Dictionary.add_term使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在dictionary.Dictionary的用法示例。


在下文中一共展示了Dictionary.add_term方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_dictionary_has_entry

# 需要导入模块: from dictionary import Dictionary [as 别名]
# 或者: from dictionary.Dictionary import add_term [as 别名]
def test_dictionary_has_entry():
    d = Dictionary()
    assert not d.has_entry('asdf', 1)

    d.add_term('asdf', 1, 10)
    assert d.has_entry('asdf', 1)
    assert not d.has_entry('qwer', 1)
开发者ID:kaiserahmed,项目名称:cs3245-hw,代码行数:9,代码来源:test_dictionary.py

示例2: build

# 需要导入模块: from dictionary import Dictionary [as 别名]
# 或者: from dictionary.Dictionary import add_term [as 别名]
def build(training_dir, dict_file, postings_file):
    dictionary = Dictionary()

    # Read each file in the training dir.
    filepaths = []
    for filename in os.listdir(training_dir):
        filepaths.append(os.path.join(training_dir, filename))

    # Sort the filepaths according to doc_id
    filepaths = sorted(filepaths, key=lambda x: int(os.path.basename(x)))

    # Two loops here to have control over the size of the loop.
    # NOTE(michael): for testing.
    # filepaths = filepaths[:10]

    with PostingsFile(postings_file, mode='w+',
            entry_cls=PostingsFileEntryWithFrequencies) as postings_file:
        for filepath in filepaths:
            # TODO(michael): Making assumption that document is an int.
            doc_id = int(os.path.basename(filepath))
            terms = process_file(filepath)
            for term in terms:
                # Create postings file entry if entry does not exist for
                # `(term, doc_id)` pair.
                if not dictionary.has_entry(term, doc_id):
                    # Update postings file entry for previous `(term, doc_id)`
                    # entry for the current term. (To point to the entry we are
                    # about to add.
                    # `(term, doc_id)` pair.
                    if dictionary.get_frequency(term) != 0:
                        previous_node_location = dictionary.get_tail(term)
                        previous_entry = \
                            postings_file.get_entry(previous_node_location)
                        previous_entry.next_pointer = postings_file.pointer
                        postings_file.write_entry(previous_entry)

                    # Add new postings file entry for the `(term, doc_id)` pair.
                    dictionary.add_term(term, doc_id, postings_file.pointer)
                    new_entry = PostingsFileEntryWithFrequencies(doc_id)
                    postings_file.write_entry(new_entry)

                # Update postings file entry term frequency. (Increment).
                # NOTE(michael): We can safely use the tail pointer since we
                # process documents in order and not at random.
                current_term_location = dictionary.get_tail(term)
                current_term_entry = \
                    postings_file.get_entry(current_term_location)
                current_term_entry.term_freq += 1
                postings_file.write_entry(current_term_entry)

    # Write dictionary to file.
    with open(dict_file, 'w') as dictionary_file:
        dictionary_file.write(dictionary.to_json())
开发者ID:kaiserahmed,项目名称:cs3245-hw,代码行数:55,代码来源:build_index.py

示例3: test_dictionary_add_term

# 需要导入模块: from dictionary import Dictionary [as 别名]
# 或者: from dictionary.Dictionary import add_term [as 别名]
def test_dictionary_add_term():
    d = Dictionary()

    first_pointer = 10
    d.add_term('asdf', 1, first_pointer)
    assert_eq(1, d.get_frequency('asdf'))
    assert_eq(first_pointer, d.get_head('asdf'))
    assert_eq(first_pointer, d.get_tail('asdf'))

    next_pointer = 20
    d.add_term('asdf', 2, next_pointer)
    assert_eq(2, d.get_frequency('asdf'))
    assert_eq(first_pointer, d.get_head('asdf'))
    assert_eq(next_pointer, d.get_tail('asdf'))

    third_pointer = 30
    d.add_term('qwer', 2, third_pointer)
    assert_eq(1, d.get_frequency('qwer'))
    assert_eq(third_pointer, d.get_head('qwer'))
    assert_eq(third_pointer, d.get_tail('qwer'))

    forth_pointer = 40
    d.add_term('asdf', 2, forth_pointer)
    assert_eq(2, d.get_frequency('asdf'))
    assert_eq(first_pointer, d.get_head('asdf'))
    assert_eq(next_pointer, d.get_tail('asdf'))
开发者ID:kaiserahmed,项目名称:cs3245-hw,代码行数:28,代码来源:test_dictionary.py

示例4: test_dictionary_add_term_pointers

# 需要导入模块: from dictionary import Dictionary [as 别名]
# 或者: from dictionary.Dictionary import add_term [as 别名]
def test_dictionary_add_term_pointers():
    d = Dictionary()

    first_pointer = 0
    d.add_term('asdf', 1, first_pointer)
    assert_eq(1, d.get_frequency('asdf'))
    assert_eq(first_pointer, d.get_head('asdf'))
    assert_eq(first_pointer, d.get_tail('asdf'))

    second_pointer = 10
    d.add_term('asdf', 2, second_pointer)
    assert_eq(2, d.get_frequency('asdf'))
    assert_eq(first_pointer, d.get_head('asdf'))
    assert_eq(second_pointer, d.get_tail('asdf'))
开发者ID:kaiserahmed,项目名称:cs3245-hw,代码行数:16,代码来源:test_dictionary.py

示例5: test_dictionary_to_json_from_json

# 需要导入模块: from dictionary import Dictionary [as 别名]
# 或者: from dictionary.Dictionary import add_term [as 别名]
def test_dictionary_to_json_from_json():
    d = Dictionary()
    d.add_term('asdf', 1, 1)
    d.add_term('asdf', 2, 1)
    d.add_term('qwer', 1, 1)
    d.add_term('zxcv', 1, 1)

    d2 = Dictionary.from_json(d.to_json())
    assert_eq(d2.all_docs(), d.all_docs())
    assert_eq(d2.all_terms(), d.all_terms())

    assert_eq(d2.get_frequency('asdf'), d.get_frequency('asdf'))
    assert_eq(d2.get_frequency('qwer'), d.get_frequency('qwer'))
    assert_eq(d2.get_frequency('zxcv'), d.get_frequency('zxcv'))

    assert_eq(d2.get_head('asdf'), d.get_head('asdf'))
    assert_eq(d2.get_head('qwer'), d.get_head('qwer'))
    assert_eq(d2.get_head('zxcv'), d.get_head('zxcv'))

    assert_eq(d2.get_tail('asdf'), d.get_tail('asdf'))
    assert_eq(d2.get_tail('qwer'), d.get_tail('qwer'))
    assert_eq(d2.get_tail('zxcv'), d.get_tail('zxcv'))
开发者ID:kaiserahmed,项目名称:cs3245-hw,代码行数:24,代码来源:test_dictionary.py

示例6: test_dictionary_all_docs

# 需要导入模块: from dictionary import Dictionary [as 别名]
# 或者: from dictionary.Dictionary import add_term [as 别名]
def test_dictionary_all_docs():
    d = Dictionary()
    assert_eq([], d.all_docs())

    d.add_term('asdf', 1, 1)
    assert_eq([1], d.all_docs())

    d.add_term('asdf', 2, 1)
    assert_eq([1, 2], d.all_docs())

    d.add_term('qwer', 1, 1)
    d.add_term('zxcv', 1, 1)
    assert_eq([1, 2], d.all_docs())
开发者ID:kaiserahmed,项目名称:cs3245-hw,代码行数:15,代码来源:test_dictionary.py

示例7: test_dictionary_all_terms

# 需要导入模块: from dictionary import Dictionary [as 别名]
# 或者: from dictionary.Dictionary import add_term [as 别名]
def test_dictionary_all_terms():
    d = Dictionary()
    assert_eq([], d.all_terms())

    d.add_term('asdf', 1, 1)
    assert_eq(['asdf'], d.all_terms())

    d.add_term('asdf', 2, 1)
    assert_eq(['asdf'], d.all_terms())

    d.add_term('qwer', 1, 1)
    d.add_term('zxcv', 1, 1)
    assert_eq(
        sorted(['asdf', 'qwer', 'zxcv']),
        sorted(d.all_terms()))
开发者ID:kaiserahmed,项目名称:cs3245-hw,代码行数:17,代码来源:test_dictionary.py


注:本文中的dictionary.Dictionary.add_term方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。