本文整理汇总了Python中luigi.run方法的典型用法代码示例。如果您正苦于以下问题:Python luigi.run方法的具体用法?Python luigi.run怎么用?Python luigi.run使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类luigi
的用法示例。
在下文中一共展示了luigi.run方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run
# 需要导入模块: import luigi [as 别名]
# 或者: from luigi import run [as 别名]
def run(self):
"""
The task's logic
"""
count = {}
ifp = self.input().open('r')
for line in ifp:
for word in line.strip().split():
count[word] = count.get(word, 0) + 1
ofp = self.output().open('w')
for k, v in count.items():
ofp.write('{}\t{}\n'.format(k, v))
ofp.close()
示例2: run
# 需要导入模块: import luigi [as 别名]
# 或者: from luigi import run [as 别名]
def run(self):
corpus = gensim.corpora.MmCorpus(self.input().path)
with self.output().open('w') as wf:
docs_with_tf = (
(docnum, corpus.docbyoffset(offset))
for docnum, offset in enumerate(corpus.index))
docs_as_pairs = (
zip([docnum] * len(doc), [term_id for term_id, _ in doc])
for docnum, doc in docs_with_tf)
docs_as_lines = (
['%s\t%s' % (docnum, termid) for docnum, termid in pairs]
for pairs in docs_as_pairs)
docs = ('\n'.join(lines) for lines in docs_as_lines)
for doc in docs:
wf.write('%s\n' % doc)
示例3: run
# 需要导入模块: import luigi [as 别名]
# 或者: from luigi import run [as 别名]
def run(self):
paper_repdocs_file, author_file = self.input()
with paper_repdocs_file.open() as pfile:
paper_df = pd.read_csv(pfile, index_col=(0,))
paper_df.fillna('', inplace=True)
# read out authorship records
with author_file.open() as afile:
author_df = pd.read_csv(afile, header=0, index_col=(0,))
# initialize repdoc dictionary from complete list of person ids
author_ids = author_df.index.unique()
repdocs = {i: [] for i in author_ids}
# build up repdocs for each author
for person_id, paper_id in author_df.itertuples():
doc = paper_df.loc[paper_id]['doc']
repdocs[person_id].append(doc)
# save repdocs
rows = ((person_id, '|'.join(docs))
for person_id, docs in repdocs.iteritems())
util.write_csv_to_fwrapper(self.output(), ('author_id', 'doc'), rows)
示例4: run
# 需要导入模块: import luigi [as 别名]
# 或者: from luigi import run [as 别名]
def run(self):
papers_file, refs_file = self.input()
paper_out, refs_out = self.output()
with papers_file.open() as pfile:
papers_df = pd.read_csv(pfile)
# Filter based on range of years
papers_df['year'] = papers_df['year'].astype(int)
filtered = papers_df[(papers_df['year'] >= self.start) &
(papers_df['year'] <= self.end)]
# Save filtered paper records
with paper_out.open('w') as outfile:
filtered.to_csv(outfile, index=False)
paper_ids = filtered['id'].unique()
# Filter and save references based on paper ids.
with refs_file.open() as rfile:
refs_df = pd.read_csv(rfile)
filtered = refs_df[(refs_df['paper_id'].isin(paper_ids)) &
(refs_df['ref_id'].isin(paper_ids))]
with refs_out.open('w') as outfile:
filtered.to_csv(outfile, index=False)
示例5: run
# 需要导入模块: import luigi [as 别名]
# 或者: from luigi import run [as 别名]
def run(self):
graphml_outfile, edgelist_outfile, idmap_outfile = self.output()
author_graph_file, _ = self.input()
# Read graph, find LCC, and save as graphml and edgelist
authorg = igraph.Graph.Read_GraphMLz(author_graph_file.path)
components = authorg.components()
lcc = components.giant()
lcc.write_graphmlz(graphml_outfile.path)
lcc.write_edgelist(edgelist_outfile.path)
# Build and save id map.
idmap = {v['name']: v.index for v in lcc.vs}
rows = sorted(idmap.items())
util.write_csv_to_fwrapper(
idmap_outfile, ('author_id', 'node_id'), rows)
示例6: run
# 需要导入模块: import luigi [as 别名]
# 或者: from luigi import run [as 别名]
def run(self):
result = self.run_query(self.query())
print '===================='
print "Job ID :", result.job_id
print "Result size:", result.size
print "Result :"
print "\t".join([c[0] for c in result.description])
print "----"
for row in result:
print "\t".join([str(c) for c in row])
print '===================='
示例7: pig_script_path
# 需要导入模块: import luigi [as 别名]
# 或者: from luigi import run [as 别名]
def pig_script_path(self):
"""
The path to the pig script to run
"""
return self.script_path
示例8: run
# 需要导入模块: import luigi [as 别名]
# 或者: from luigi import run [as 别名]
def run(self):
url = "http://gutenberg.readingroo.ms/cache/generated/feeds/catalog.marc.bz2"
output = shellout('wget -q "{url}" -O {output}', url=url)
output = shellout('bunzip2 {input} -c > {output}', input=output)
luigi.LocalTarget(output).move(self.output().path)
示例9: run
# 需要导入模块: import luigi [as 别名]
# 或者: from luigi import run [as 别名]
def run(self):
""" Just run wget quietly. """
output = shellout('wget -q "{url}" -O {output}', url=self.url)
luigi.LocalTarget(output).move(self.output().path)