本文整理汇总了Python中corpus.Corpus.process方法的典型用法代码示例。如果您正苦于以下问题:Python Corpus.process方法的具体用法?Python Corpus.process怎么用?Python Corpus.process使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类corpus.Corpus
的用法示例。
在下文中一共展示了Corpus.process方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process_projects
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import process [as 别名]
def process_projects(src_directory, glossary_description, glossary_file):
corpus = Corpus(src_directory)
corpus.process()
reference_sources = ReferenceSources()
reference_sources.read_sources()
metrics = Metrics()
metrics.create(corpus)
# Select terms
MAX_TERMS = 5000
sorted_terms_by_tfxdf = sorted(metrics.tfxdf, key=metrics.tfxdf.get,
reverse=True)
# Developer report
glossary_entries = OrderedDict()
translations = Translations()
selected_terms = sorted_terms_by_tfxdf[:MAX_TERMS] # Sorted by frequency
for term in selected_terms:
glossary_entries[term] = translations.create_for_word_sorted_by_frequency(corpus.documents, term, reference_sources)
dev_glossary_serializer = DevGlossarySerializer()
dev_glossary_serializer.create(u"dev-" + glossary_file + ".html",
glossary_description, corpus,
glossary_entries, reference_sources)
# User report
glossary_entries = []
selected_terms = sorted(sorted_terms_by_tfxdf[:MAX_TERMS]) # Sorted by term
glossary = Glossary(glossary_description)
for term in selected_terms:
glossary_entry = GlossaryEntry(
term,
translations.create_for_word_sorted_by_frequency(corpus.documents,
term,
reference_sources)
)
glossary.entries.append(glossary_entry)
glossary_entries = glossary.get_dict()
process_template('templates/userglossary-html.mustache',
glossary_file + ".html", glossary_entries)
process_template('templates/userglossary-csv.mustache',
glossary_file + ".csv", glossary_entries)
generate_database(glossary, glossary_file)
示例2: process_projects
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import process [as 别名]
def process_projects():
global glossary_file
global glossary_description
corpus = Corpus(src_directory)
corpus.process()
reference_sources = ReferenceSources()
reference_sources.read_sources()
metrics = Metrics()
metrics.create(corpus)
# Select terms
MAX_TERMS = 1000
sorted_terms_by_tfxdf = sorted(metrics.tfxdf, key=metrics.tfxdf.get,
reverse=True)
# Developer report
glossary_entries = OrderedDict()
translations = Translations()
selected_terms = sorted_terms_by_tfxdf[:MAX_TERMS] # Sorted by frequency
for term in selected_terms:
glossary_entries[term] = translations.create_for_word_sorted_by_frequency(corpus.documents, term, reference_sources)
dev_glossary_serializer = DevGlossarySerializer()
dev_glossary_serializer.create(u"dev-" + glossary_file + ".html",
glossary_description, corpus,
glossary_entries, reference_sources)
# User report
glossary_entries = []
selected_terms = sorted(sorted_terms_by_tfxdf[:MAX_TERMS]) # Sorted by term
glossary = Glossary()
glossary.description = glossary_description
for term in selected_terms:
glossary_entry = GlossaryEntry()
glossary_entry.source_term = term
glossary_entry.translations = translations.create_for_word_sorted_by_frequency(corpus.documents, term, reference_sources)
glossary.entries.append(glossary_entry)
user_glossary_serializer = UserGlossarySerializer()
user_glossary_serializer.create(glossary_file, glossary.get_dict(),
reference_sources)