当前位置: 首页>>代码示例>>Python>>正文


Python Corpus.process方法代码示例

本文整理汇总了Python中corpus.Corpus.process方法的典型用法代码示例。如果您正苦于以下问题:Python Corpus.process方法的具体用法?Python Corpus.process怎么用?Python Corpus.process使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在corpus.Corpus的用法示例。


在下文中一共展示了Corpus.process方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: process_projects

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import process [as 别名]
def process_projects(src_directory, glossary_description, glossary_file):
    corpus = Corpus(src_directory)
    corpus.process()

    reference_sources = ReferenceSources()
    reference_sources.read_sources()

    metrics = Metrics()
    metrics.create(corpus)

    # Select terms
    MAX_TERMS = 5000
    sorted_terms_by_tfxdf = sorted(metrics.tfxdf, key=metrics.tfxdf.get,
                                   reverse=True)

    # Developer report
    glossary_entries = OrderedDict()
    translations = Translations()
    selected_terms = sorted_terms_by_tfxdf[:MAX_TERMS] # Sorted by frequency

    for term in selected_terms:
        glossary_entries[term] = translations.create_for_word_sorted_by_frequency(corpus.documents, term, reference_sources)

    dev_glossary_serializer = DevGlossarySerializer()
    dev_glossary_serializer.create(u"dev-" + glossary_file + ".html",
                                   glossary_description, corpus,
                                   glossary_entries, reference_sources)

    # User report
    glossary_entries = []
    selected_terms = sorted(sorted_terms_by_tfxdf[:MAX_TERMS])  # Sorted by term

    glossary = Glossary(glossary_description)
    for term in selected_terms:
        glossary_entry = GlossaryEntry(
            term,
            translations.create_for_word_sorted_by_frequency(corpus.documents,
                                                             term,
                                                             reference_sources)
        )
        glossary.entries.append(glossary_entry)

    glossary_entries = glossary.get_dict()
    process_template('templates/userglossary-html.mustache',
                     glossary_file + ".html", glossary_entries)
    process_template('templates/userglossary-csv.mustache',
                     glossary_file + ".csv", glossary_entries)

    generate_database(glossary, glossary_file)
开发者ID:txemaq,项目名称:translation-memory-tools,代码行数:51,代码来源:term-extract.py

示例2: process_projects

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import process [as 别名]
def process_projects():
    global glossary_file
    global glossary_description

    corpus = Corpus(src_directory)
    corpus.process()

    reference_sources = ReferenceSources()
    reference_sources.read_sources()

    metrics = Metrics()
    metrics.create(corpus)

    # Select terms
    MAX_TERMS = 1000
    sorted_terms_by_tfxdf = sorted(metrics.tfxdf, key=metrics.tfxdf.get,
                                   reverse=True)

    # Developer report
    glossary_entries = OrderedDict()
    translations = Translations()
    selected_terms = sorted_terms_by_tfxdf[:MAX_TERMS] # Sorted by frequency

    for term in selected_terms:
        glossary_entries[term] = translations.create_for_word_sorted_by_frequency(corpus.documents, term, reference_sources)

    dev_glossary_serializer = DevGlossarySerializer()
    dev_glossary_serializer.create(u"dev-" + glossary_file + ".html",
                                   glossary_description, corpus,
                                   glossary_entries, reference_sources)

    # User report
    glossary_entries = []
    selected_terms = sorted(sorted_terms_by_tfxdf[:MAX_TERMS])  # Sorted by term

    glossary = Glossary()
    glossary.description = glossary_description
    for term in selected_terms:
        glossary_entry = GlossaryEntry()
        glossary_entry.source_term = term
        glossary_entry.translations = translations.create_for_word_sorted_by_frequency(corpus.documents, term, reference_sources)
        glossary.entries.append(glossary_entry)

    user_glossary_serializer = UserGlossarySerializer()
    user_glossary_serializer.create(glossary_file, glossary.get_dict(),
                                    reference_sources)
开发者ID:gforcada,项目名称:translation-memory-tools,代码行数:48,代码来源:term-extract.py


注:本文中的corpus.Corpus.process方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。