当前位置: 首页>>代码示例>>Python>>正文


Python Bed.getNumColumns方法代码示例

本文整理汇总了Python中CGAT.Bed.getNumColumns方法的典型用法代码示例。如果您正苦于以下问题:Python Bed.getNumColumns方法的具体用法?Python Bed.getNumColumns怎么用?Python Bed.getNumColumns使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在CGAT.Bed的用法示例。


在下文中一共展示了Bed.getNumColumns方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: aggregateWindowsReadCounts

# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import getNumColumns [as 别名]
def aggregateWindowsReadCounts(infiles,
                               outfile,
                               regex="(.*)\..*"):
    '''aggregate several results from coverageBed
    into a single file.

    *regex* is used to extract the track name from the filename.
    The default removes any suffix.

    coverageBed outputs the following columns:
    1 Contig
    2 Start
    3 Stop
    4 Name
    5 The number of features in A that overlapped (by at least one
      base pair) the B interval.
    6 The number of bases in B that had non-zero coverage from features in A.
    7 The length of the entry in B.
    8 The fraction of bases in B that had non-zero coverage from
      features in A.

    For bed: use column 5
    For bed6: use column 7
    For bed12: use column 13

    Windows without any counts will not be output.
    '''

    # get bed format
    bed_columns = Bed.getNumColumns(infiles[0])
    # +1 as awk is 1-based
    column = bed_columns - 4 + 1

    src = " ".join(['''<( zcat %s |
              awk '{printf("%%s:%%i-%%i\\t%%i\\n", $1,$2,$3,$%s );}' ) ''' %
                    (x, column) for x in infiles])
    tmpfile = P.getTempFilename(".")
    statement = '''paste %(src)s > %(tmpfile)s'''
    P.run()

    # build track names
    tracks = [re.search(regex, os.path.basename(x)).groups()[0]
              for x in infiles]

    outf = IOTools.openFile(outfile, "w")
    outf.write("interval_id\t%s\n" % "\t".join(tracks))

    for line in open(tmpfile, "r"):
        data = line[:-1].split("\t")
        genes = list(set([data[x] for x in range(0, len(data), 2)]))
        values = [int(data[x]) for x in range(1, len(data), 2)]
        if sum(values) == 0:
            continue
        assert len(genes) == 1, \
            "paste command failed, wrong number of genes per line: '%s'" % line
        outf.write("%s\t%s\n" % (genes[0], "\t".join(map(str, values))))

    outf.close()

    os.unlink(tmpfile)
开发者ID:BioXiao,项目名称:CGATPipelines,代码行数:62,代码来源:PipelineWindows.py

示例2: aggregateWindowsReadCounts

# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import getNumColumns [as 别名]
def aggregateWindowsReadCounts( infiles, outfile ):
    '''aggregate tag counts for each window.

    coverageBed outputs the following columns:
    1) Contig
    2) Start
    3) Stop
    4) Name
    5) The number of features in A that overlapped (by at least one base pair) the B interval.
    6) The number of bases in B that had non-zero coverage from features in A.
    7) The length of the entry in B.
    8) The fraction of bases in B that had non-zero coverage from features in A.

    For bed: use column 5
    For bed6: use column 7
    For bed12: use column 13

    Tiles with no counts will not be output.
    '''
    
    to_cluster = True

    # get bed format
    bed_columns = Bed.getNumColumns( infiles[0] )
    # +1 as awk is 1-based
    column = bed_columns - 4 + 1

    src = " ".join( [ '''<( zcat %s | awk '{printf("%%s:%%i-%%i\\t%%i\\n", $1,$2,$3,$%s );}' ) ''' % (x,column) for x in infiles] )
    tmpfile = P.getTempFilename( "." )
    statement = '''paste %(src)s > %(tmpfile)s'''
    P.run()
    
    tracks = [ re.sub( "\..*", '', os.path.basename(x) ) for x in infiles ]

    outf = IOTools.openFile( outfile, "w")
    outf.write( "interval_id\t%s\n" % "\t".join( tracks ) )
    
    for line in open( tmpfile, "r" ):
        data = line[:-1].split("\t")
        genes = list(set([ data[x] for x in range(0,len(data), 2 ) ]))
        values = [ int(data[x]) for x in range(1,len(data), 2 ) ]
        if sum(values) == 0: continue
        assert len(genes) == 1, "paste command failed, wrong number of genes per line: '%s'" % line
        outf.write( "%s\t%s\n" % (genes[0], "\t".join(map(str, values) ) ) )
    
    outf.close()

    os.unlink(tmpfile)
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:50,代码来源:PipelineWindows.py

示例3: aggregateWindowsTagCounts

# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import getNumColumns [as 别名]
def aggregateWindowsTagCounts(infiles,
                              outfile,
                              regex="(.*)\..*"):
    '''aggregate output from several ``bedtools coverage`` results.

    ``bedtools coverage`` outputs the following columns for a bed4
    file::

    1 Contig
    2 Start
    3 Stop
    4 Name
    5 The number of features in A that overlapped (by at least one
      base pair) the B interval.
    6 The number of bases in B that had non-zero coverage from features in A.
    7 The length of the entry in B.
    8 The fraction of bases in B that had non-zero coverage from
      features in A.

    This method autodetects the number of columns in the :term:`infiles`
    and selects:

    * bed4: use column 5
    * bed6: use column 7
    * bed12: use column 13

    Arguments
    ---------
    infiles : list
        Input filenames with the output from ``bedtools coverage``
    outfile : string
        Output filename in :term:`tsv` format.
    regex : string
        Regular expression used to extract the track name from the
        filename.  The default removes any suffix.

    '''

    # get bed format
    bed_columns = Bed.getNumColumns(infiles[0])
    # +1 as awk is 1-based
    column = bed_columns - 4 + 1

    src = " ".join(["""<( zcat %s |
              awk '{printf("%%s:%%i-%%i\\t%%i\\n", $1,$2,$3,$%s );}')""" %
                    (x, column) for x in infiles])
    tmpfile = P.getTempFilename(".")
    statement = '''paste %(src)s > %(tmpfile)s'''
    P.run()

    # build track names
    tracks = [re.search(regex, os.path.basename(x)).groups()[0]
              for x in infiles]

    outf = IOTools.openFile(outfile, "w")
    outf.write("interval_id\t%s\n" % "\t".join(tracks))

    # filter for uniqueness - keys with the same value as the
    # previous line will be ignored.
    last_gene = None
    c = E.Counter()
    for line in open(tmpfile, "r"):
        c.input += 1
        data = line[:-1].split("\t")
        genes = list(set([data[x] for x in range(0, len(data), 2)]))
        values = [int(data[x]) for x in range(1, len(data), 2)]

        assert len(genes) == 1, \
            "paste command failed, wrong number of genes per line: '%s'" % line
        if genes[0] == last_gene:
            c.duplicates += 1
            continue
        c.output += 1
        outf.write("%s\t%s\n" % (genes[0], "\t".join(map(str, values))))
        last_gene = genes[0]

    outf.close()

    os.unlink(tmpfile)

    E.info("aggregateWindowsTagCounts: %s" % c)
开发者ID:gjaime,项目名称:CGATPipelines,代码行数:83,代码来源:PipelineWindows.py


注:本文中的CGAT.Bed.getNumColumns方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。