Python luigi.run方法代码示例

本文整理汇总了Python中luigi.run方法的典型用法代码示例。如果您正苦于以下问题：Python luigi.run方法的具体用法？Python luigi.run怎么用？Python luigi.run使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类luigi的用法示例。

在下文中一共展示了luigi.run方法的9个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: run

# 需要导入模块: import luigi [as 别名]
# 或者: from luigi import run [as 别名]
def run(self):
      """
      The task's logic
      """
      count = {}

      ifp = self.input().open('r')

      for line in ifp:
         for word in line.strip().split():
            count[word] = count.get(word, 0) + 1

      ofp = self.output().open('w')
      for k, v in count.items():
            ofp.write('{}\t{}\n'.format(k, v))
      ofp.close()

开发者ID:MinerKasch，项目名称:HadoopWithPython，代码行数:18，代码来源:wordcount.py

示例2: run

# 需要导入模块: import luigi [as 别名]
# 或者: from luigi import run [as 别名]
def run(self):
        corpus = gensim.corpora.MmCorpus(self.input().path)
        with self.output().open('w') as wf:
            docs_with_tf = (
                (docnum, corpus.docbyoffset(offset))
                for docnum, offset in enumerate(corpus.index))
            docs_as_pairs = (
                zip([docnum] * len(doc), [term_id for term_id, _ in doc])
                for docnum, doc in docs_with_tf)
            docs_as_lines = (
                ['%s\t%s' % (docnum, termid) for docnum, termid in pairs]
                for pairs in docs_as_pairs)
            docs = ('\n'.join(lines) for lines in docs_as_lines)

            for doc in docs:
                wf.write('%s\n' % doc)

开发者ID:macks22，项目名称:dblp，代码行数:18，代码来源:convert.py

示例3: run

# 需要导入模块: import luigi [as 别名]
# 或者: from luigi import run [as 别名]
def run(self):
        paper_repdocs_file, author_file = self.input()

        with paper_repdocs_file.open() as pfile:
            paper_df = pd.read_csv(pfile, index_col=(0,))
            paper_df.fillna('', inplace=True)

        # read out authorship records
        with author_file.open() as afile:
            author_df = pd.read_csv(afile, header=0, index_col=(0,))

        # initialize repdoc dictionary from complete list of person ids
        author_ids = author_df.index.unique()
        repdocs = {i: [] for i in author_ids}

        # build up repdocs for each author
        for person_id, paper_id in author_df.itertuples():
            doc = paper_df.loc[paper_id]['doc']
            repdocs[person_id].append(doc)

        # save repdocs
        rows = ((person_id, '|'.join(docs))
                for person_id, docs in repdocs.iteritems())
        util.write_csv_to_fwrapper(self.output(), ('author_id', 'doc'), rows)

开发者ID:macks22，项目名称:dblp，代码行数:26，代码来源:repdocs.py

示例4: run

# 需要导入模块: import luigi [as 别名]
# 或者: from luigi import run [as 别名]
def run(self):
        papers_file, refs_file = self.input()
        paper_out, refs_out = self.output()

        with papers_file.open() as pfile:
            papers_df = pd.read_csv(pfile)

        # Filter based on range of years
        papers_df['year'] = papers_df['year'].astype(int)
        filtered = papers_df[(papers_df['year'] >= self.start) &
                             (papers_df['year'] <= self.end)]

        # Save filtered paper records
        with paper_out.open('w') as outfile:
            filtered.to_csv(outfile, index=False)
            paper_ids = filtered['id'].unique()

        # Filter and save references based on paper ids.
        with refs_file.open() as rfile:
            refs_df = pd.read_csv(rfile)

        filtered = refs_df[(refs_df['paper_id'].isin(paper_ids)) &
                           (refs_df['ref_id'].isin(paper_ids))]
        with refs_out.open('w') as outfile:
            filtered.to_csv(outfile, index=False)

开发者ID:macks22，项目名称:dblp，代码行数:27，代码来源:filtering.py

示例5: run

# 需要导入模块: import luigi [as 别名]
# 或者: from luigi import run [as 别名]
def run(self):
        graphml_outfile, edgelist_outfile, idmap_outfile = self.output()
        author_graph_file, _ = self.input()

        # Read graph, find LCC, and save as graphml and edgelist
        authorg = igraph.Graph.Read_GraphMLz(author_graph_file.path)
        components = authorg.components()
        lcc = components.giant()
        lcc.write_graphmlz(graphml_outfile.path)
        lcc.write_edgelist(edgelist_outfile.path)

        # Build and save id map.
        idmap = {v['name']: v.index for v in lcc.vs}
        rows = sorted(idmap.items())
        util.write_csv_to_fwrapper(
            idmap_outfile, ('author_id', 'node_id'), rows)

开发者ID:macks22，项目名称:dblp，代码行数:18，代码来源:build_graphs.py

示例6: run

# 需要导入模块: import luigi [as 别名]
# 或者: from luigi import run [as 别名]
def run(self):
        result = self.run_query(self.query())
        print '===================='
        print "Job ID     :", result.job_id
        print "Result size:", result.size
        print "Result     :"
        print "\t".join([c[0] for c in result.description])
        print "----"
        for row in result:
            print "\t".join([str(c) for c in row])
        print '===================='

开发者ID:treasure-data，项目名称:luigi-td，代码行数:13，代码来源:tasks.py

示例7: pig_script_path

# 需要导入模块: import luigi [as 别名]
# 或者: from luigi import run [as 别名]
def pig_script_path(self):
      """
      The path to the pig script to run
      """
      return self.script_path

开发者ID:MinerKasch，项目名称:HadoopWithPython，代码行数:7，代码来源:luigi_pig.py

示例8: run

# 需要导入模块: import luigi [as 别名]
# 或者: from luigi import run [as 别名]
def run(self):
        url = "http://gutenberg.readingroo.ms/cache/generated/feeds/catalog.marc.bz2"
        output = shellout('wget -q "{url}" -O {output}', url=url)
        output = shellout('bunzip2 {input} -c > {output}', input=output)
        luigi.LocalTarget(output).move(self.output().path)

开发者ID:miku，项目名称:gluish，代码行数:7，代码来源:gutenberg.py

示例9: run

# 需要导入模块: import luigi [as 别名]
# 或者: from luigi import run [as 别名]
def run(self):
        """ Just run wget quietly. """
        output = shellout('wget -q "{url}" -O {output}', url=self.url)
        luigi.LocalTarget(output).move(self.output().path)

开发者ID:miku，项目名称:gluish，代码行数:6，代码来源:newspapers.py

注：本文中的luigi.run方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。