当前位置: 首页>>代码示例>>Python>>正文


Python BedTool.sequence方法代码示例

本文整理汇总了Python中pybedtools.BedTool.sequence方法的典型用法代码示例。如果您正苦于以下问题:Python BedTool.sequence方法的具体用法?Python BedTool.sequence怎么用?Python BedTool.sequence使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pybedtools.BedTool的用法示例。


在下文中一共展示了BedTool.sequence方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: calc_origin_bkgd_freqs

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sequence [as 别名]
def calc_origin_bkgd_freqs(bedtool, strand, fasta_filename, verbose):

    # add strand to bedtool
    if strand == 'pos':
        strand_char = '+'
    elif strand == 'neg':
        strand_char = '-'

    intervals = []
    for row in bedtool:
        # input is BED6, output needs BED6
        row.strand = strand_char
        intervals.append(row)

    stranded_bedtool = BedTool(intervals)

    fastatool = stranded_bedtool.sequence(fi=fasta_filename, s=True)

    kwargs = {'region_size_min':1,
              'region_size_max':1,
              'ignore_chroms':[],
              'only_chroms':[],
              'verbose':verbose}

    if verbose:
        print >>sys.stderr, ">> calculating background freqs ..."

    result = calc_bkgd_counts(fastatool.seqfn, **kwargs)

    return result
开发者ID:hesselberthlab,项目名称:modmap,代码行数:32,代码来源:origin_analysis.py

示例2: getNegativeDatasetFASTA

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sequence [as 别名]
def getNegativeDatasetFASTA(config):
	try:
		coordinates = BedTool(config['negativesBedFile'])
		genome = BedTool(config['maize_genome_filepath'])
		dataset = coordinates.sequence(fi=genome, fo=config['negative_dataset_output'])
	except ValueError:
		print 'getNegativeDatasetFASTA; File ', config['maize_genome_filepath'], ' not found'
开发者ID:Tay2510,项目名称:PyCorn,代码行数:9,代码来源:driver.py

示例3: getPositiveDatasetFASTA

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sequence [as 别名]
def getPositiveDatasetFASTA(config):
	if (not os.path.isfile(config['positive_dataset_output'])):
		try:
			coordinates = BedTool(config['bed_file_post'])
			genome = BedTool(config['maize_genome_filepath'])
			dataset = coordinates.sequence(fi=genome, fo=config['positive_dataset_output'])
		except ValueError:
			print 'getPositiveDatasetFASTA; File ', config['maize_genome_filepath'], ' not found'
开发者ID:Tay2510,项目名称:PyCorn,代码行数:10,代码来源:driver.py

示例4: getCDSs

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sequence [as 别名]
def getCDSs(bedfilename, reffilename, strand):
    """
    return iterator of coding sequences
    """
    bed = BedTool(bedfilename)
    bed = bed.filter(lambda x: x.strand == strand)
    fasta = reffilename
    bed = bed.sequence(fi=fasta, s=True)
    return SeqIO.parse(bed.seqfn, "fasta")
开发者ID:adamjorr,项目名称:zypy,代码行数:11,代码来源:degenerate.py

示例5: folding_analysis

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sequence [as 别名]
def folding_analysis(bedfilename, fastafilename, verbose):

    bedtool = BedTool(bedfilename)
    for region in bedtool:

        region_seq = bedtool.sequence()

        struct, mfe = RNA.fold(region_seq) 

        for pos, nuc  in enumerate(region_seq):
            struct_char = struct[pos]
开发者ID:hesselberthlab,项目名称:5OH,代码行数:13,代码来源:folding_analysis.py

示例6: prepareUTRs

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sequence [as 别名]
def prepareUTRs(bed, bed12, bed12Fasta, referenceFasta, readLength, polyALength, explv, snpRate, vcfFile):
    
    # Read utrs from BED file
    utrs = parseUtrBedFile(bed)
    
    vcf = open(vcfFile, "w")
    print("##fileformat=VCFv4.1", file=vcf)
    print("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", file=vcf)
    
    bedFile = BedTool(bed)
    
    bedFasta = bedFile.sequence(fi=referenceFasta, s=True, name=True)
      
    bed12FastaFile = open(bed12Fasta, "w")
    utrName = None
    for line in bedFasta.print_sequence().splitlines():
        if(line[0] == ">"):
            print(line, file=bed12FastaFile)
            utrName = line[1:] 
        else:
            print(simulateUTR(line, utrs[utrName], polyALength, snpRate, vcf), file=bed12FastaFile)
    bed12FastaFile.close()
    vcf.close()
    
    bed12File = open(bed12, "w")
    
    totalLength = 0
    
    minFragmentLength = 150
    maxFragmentLength = 450
    for utr in BedIterator(bed):
        
        fragmentLength = random.randrange(minFragmentLength, maxFragmentLength, 1) #+ readLength
        fragmentLength = min(fragmentLength, utr.getLength())
        
        start = max(0, utr.getLength() - fragmentLength)
        end = utr.getLength() #- readLength

        totalLength += (end - start)
#         min(utr.getLength() + readLength / 4, fragmentLength + readLength / 4)
        print(utr.name, start, end, utr.name, utr.score, "+", start, end, "255,0,0", "1", (end - start), 0, sep="\t", file=bed12File)
        
    bed12File.close()    
    
    output = shell(getBinary("genexplvprofile.py") + " --geometric 1 " + bed12 + " 2> /dev/null > " + explv)
    if len(output.strip()) > 5:
        print(output)
        
    return totalLength
开发者ID:t-neumann,项目名称:slamdunk,代码行数:51,代码来源:simulator.py

示例7: needle

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sequence [as 别名]
def needle(chrom, start, end, name, score, strand):
    n = 0
    item=BedTool([(chrom, start, end, name, score, strand)])
    item = item.sequence(fi=genome, s=True)
    temp = open(item.seqfn).read().split('\n')[1]
    if name == "MIRb":
        needle_cline = NeedleCommandline(asequence="asis:"+MIRb, bsequence="asis:"+temp,gapopen=10, gapextend=0.5, outfile='stdout')
        child = subprocess.Popen(str(needle_cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=(sys.platform!="win32"))
        child.wait()
        align = AlignIO.read(child.stdout, "emboss")
        n = char.search(str(align[1,:].seq)).end()
                    
    elif name == "MIRc":
        needle_cline = NeedleCommandline(asequence="asis:"+MIRc, bsequence="asis:"+temp,gapopen=10, gapextend=0.5, outfile='stdout')
        child = subprocess.Popen(str(needle_cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=(sys.platform!="win32"))
        child.wait()
        align = AlignIO.read(child.stdout, "emboss")
        n = char.search(str(align[1,:].seq)).end()
                    
    elif name == "MIR3":
        needle_cline = NeedleCommandline(asequence="asis:"+MIR3, bsequence="asis:"+temp,gapopen=10, gapextend=0.5, outfile='stdout')
        child = subprocess.Popen(str(needle_cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=(sys.platform!="win32"))
        child.wait()
        align = AlignIO.read(child.stdout, "emboss")
        n = char.search(str(align[1,:].seq)).end()
                    
    elif name == "MIR":
        needle_cline = NeedleCommandline(asequence="asis:"+MIR, bsequence="asis:"+temp,gapopen=10, gapextend=0.5, outfile='stdout')
        child = subprocess.Popen(str(needle_cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=(sys.platform!="win32"))
        child.wait()
        align = AlignIO.read(child.stdout, "emboss")
        n = char.search(str(align[1,:].seq)).end()
        
    elif "Alu" in name:
        needle_cline = NeedleCommandline(asequence="asis:"+ALU, bsequence="asis:"+temp,gapopen=10, gapextend=0.5, outfile='stdout')
        child = subprocess.Popen(str(needle_cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=(sys.platform!="win32"))
        child.wait()
        align = AlignIO.read(child.stdout, "emboss")
        n = char.search(str(align[1,:].seq)).end()
        
    return n        
开发者ID:LordStriker,项目名称:SINEs,代码行数:43,代码来源:SINEs_find.py

示例8: extract_fasta

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sequence [as 别名]
    def extract_fasta(self, bed_in, fasta_in, fasta_out=None):
        """Extract fasta of bed regions

        Parameters
        ----------
        bed_in: string
            Path to input bed
        fasta_in: string
            Absolute path to location of reference fasta file

        fasta_out: string
            Path to write extracted fasta sequence

        Returns
        -------
        fasta: string
            Fasta sequence combined
        """
        bed = BedTool(bed_in)
        extracted_fasta = bed.sequence(fi=os.path.abspath(fasta_in))
        temp_fasta = extracted_fasta.seqfn
        make_uppercase_fasta(temp_fasta, os.path.abspath(fasta_out))
        os.remove(temp_fasta)
开发者ID:saketkc,项目名称:moca,代码行数:25,代码来源:model.py

示例9: open

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sequence [as 别名]
import sys
import argparse
from pybedtools import BedTool

parser = argparse.ArgumentParser(description='get fasta from bed files and count ATCG percentage')
parser.add_argument('-b', '--bed', required=True, help='input bed file')
parser.add_argument('-f', '--fasta', required=True, help='fasta file')
args = parser.parse_args()


with open(args.bed) as bedFile:
    fasta = BedTool(args.fasta)
    for line in bedFile:
        bedline = BedTool(line, from_string=True)
        get_fasta = bedline.sequence(fi=fasta, split=True, s=True)
        seq =  (open(get_fasta.seqfn).read()).split('\n')[1]
        #print seq
        seq = seq.upper()
        countA = float(seq.count('A'))
        countT = float(seq.count('T'))
        countC = float(seq.count('C'))
        countG = float(seq.count('G'))
        seq_len= len(seq)
        percentageA = countA / seq_len 
        percentageT = countT / seq_len
        percentageC = countC / seq_len
        percentageG = countG / seq_len
        print line.split()[3],'\t',percentageA,'\t',percentageT,'\t',percentageC,'\t',percentageG
        
开发者ID:CrescentLuo,项目名称:Amphisbaena,代码行数:30,代码来源:ATCG.py

示例10: Bedfile

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sequence [as 别名]
class Bedfile(object):
    """Class to crate a bed file object
    Parameters
    ----------
    filepath: string
        Absolute path to bedfile

    genome_table: string
        Absolute path to geonme chromosome size file
    """
    def __init__(self, filepath, genome_table):
        self.filepath = filepath
        self.bed_format = None
        if not os.path.isfile(filepath):
            raise MocaException('Bed file {} not found'.format(self.filepath))
        self._read()
        self.bed_format = self.guess_bedformat()
        self.sort_bed()
        self.bed = BedTool(filepath)
        self.genome_table = genome_table
        assert self.bed_Format is not None

    def _read(self):
        try:
            self.bed_df = pandas.read_table(self.filepath,
                                        header=None)
        except Exception as e:
            raise MocaException('Error reading bed file {}'.format(self.filepath),
                                'Traceback: {}'.format(e))

    def guess_bedformat(self):
        """Method to guess bed format
        Returns
        -------
        bed_format: string
            BED format

        Example:
            >>> bed_df = Bedfile('file.bed')
            >>> print(bed_df.guess_bed_format())

        """
        self.bed_columns = self.bed_df.columns
        count = len(self.bed_columns)
        try:
            bed_format = __BED_TYPES__[count]
        except KeyError:
            raise MocaException('Bed file had {} columns. Supported column lengths are {}')
        return bed_format

    def slop_bed(self, flank_length=5):
        """Add flanking sequences to bed file
        Parameters
        ----------
        flank_length: int
            the bed region is expanded in both direction by flank_length number of bases
        Returns
        -------
        slop_bed: dataframe
            Slopped bed data object
        """
        self.bed.slop(g=self.genome_table,
                      b=flank_length
                      )

    def convert_to_scorefile(self):

        """
        filename, file_extension = os.path.splitext(self.filepath)
        filename += '.sorted'
        self.bed_df.to_csv(filename+file_extension,
                           sep='\t',
                           columns=['chrom', 'peak_positions', 'score'],
                           index=False,
                           header=False)
        """
    if filetype=='narrowPeak':
        filter_df1 = df[df.peak.astype(int)==-1]
        filter_df2 = df[df.peak.astype(int)!=-1]
        filter_df1['peak_positions'] = (filter_df1['chromStart'].astype(int)+filter_df1['chromEnd'].astype(int))
        filter_df1['peak_positions'] = [int(x/2) for x in filter_df1['peak_positions'].astype(int)]
        filter_df2['peak_positions'] = filter_df2['chromStart'].astype(int)+filter_df2['peak'].astype(int)
        df = pandas.concat([filter_df1, filter_df2])
    else:
        df['peak_positions'] = (df['chromStart']+df['chromEnd'])
        df['peak_positions'] = [int(x/2) for x in df['peak_positions'].astype(int)]



    def extract_fasta(self, fasta_file):
        """Extract fasta of bed regions
        Parameters
        ----------
        fasta_file: string
            Absolute path to location of fasta file
        Returns
        -------
        fasta: string
            Fasta sequence combined
        """
#.........这里部分代码省略.........
开发者ID:saketkc,项目名称:moca_web,代码行数:103,代码来源:model.py


注:本文中的pybedtools.BedTool.sequence方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。