本文整理汇总了Python中pybedtools.BedTool.sequence方法的典型用法代码示例。如果您正苦于以下问题:Python BedTool.sequence方法的具体用法?Python BedTool.sequence怎么用?Python BedTool.sequence使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybedtools.BedTool
的用法示例。
在下文中一共展示了BedTool.sequence方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: calc_origin_bkgd_freqs
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sequence [as 别名]
def calc_origin_bkgd_freqs(bedtool, strand, fasta_filename, verbose):
# add strand to bedtool
if strand == 'pos':
strand_char = '+'
elif strand == 'neg':
strand_char = '-'
intervals = []
for row in bedtool:
# input is BED6, output needs BED6
row.strand = strand_char
intervals.append(row)
stranded_bedtool = BedTool(intervals)
fastatool = stranded_bedtool.sequence(fi=fasta_filename, s=True)
kwargs = {'region_size_min':1,
'region_size_max':1,
'ignore_chroms':[],
'only_chroms':[],
'verbose':verbose}
if verbose:
print >>sys.stderr, ">> calculating background freqs ..."
result = calc_bkgd_counts(fastatool.seqfn, **kwargs)
return result
示例2: getNegativeDatasetFASTA
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sequence [as 别名]
def getNegativeDatasetFASTA(config):
try:
coordinates = BedTool(config['negativesBedFile'])
genome = BedTool(config['maize_genome_filepath'])
dataset = coordinates.sequence(fi=genome, fo=config['negative_dataset_output'])
except ValueError:
print 'getNegativeDatasetFASTA; File ', config['maize_genome_filepath'], ' not found'
示例3: getPositiveDatasetFASTA
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sequence [as 别名]
def getPositiveDatasetFASTA(config):
if (not os.path.isfile(config['positive_dataset_output'])):
try:
coordinates = BedTool(config['bed_file_post'])
genome = BedTool(config['maize_genome_filepath'])
dataset = coordinates.sequence(fi=genome, fo=config['positive_dataset_output'])
except ValueError:
print 'getPositiveDatasetFASTA; File ', config['maize_genome_filepath'], ' not found'
示例4: getCDSs
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sequence [as 别名]
def getCDSs(bedfilename, reffilename, strand):
"""
return iterator of coding sequences
"""
bed = BedTool(bedfilename)
bed = bed.filter(lambda x: x.strand == strand)
fasta = reffilename
bed = bed.sequence(fi=fasta, s=True)
return SeqIO.parse(bed.seqfn, "fasta")
示例5: folding_analysis
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sequence [as 别名]
def folding_analysis(bedfilename, fastafilename, verbose):
bedtool = BedTool(bedfilename)
for region in bedtool:
region_seq = bedtool.sequence()
struct, mfe = RNA.fold(region_seq)
for pos, nuc in enumerate(region_seq):
struct_char = struct[pos]
示例6: prepareUTRs
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sequence [as 别名]
def prepareUTRs(bed, bed12, bed12Fasta, referenceFasta, readLength, polyALength, explv, snpRate, vcfFile):
# Read utrs from BED file
utrs = parseUtrBedFile(bed)
vcf = open(vcfFile, "w")
print("##fileformat=VCFv4.1", file=vcf)
print("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", file=vcf)
bedFile = BedTool(bed)
bedFasta = bedFile.sequence(fi=referenceFasta, s=True, name=True)
bed12FastaFile = open(bed12Fasta, "w")
utrName = None
for line in bedFasta.print_sequence().splitlines():
if(line[0] == ">"):
print(line, file=bed12FastaFile)
utrName = line[1:]
else:
print(simulateUTR(line, utrs[utrName], polyALength, snpRate, vcf), file=bed12FastaFile)
bed12FastaFile.close()
vcf.close()
bed12File = open(bed12, "w")
totalLength = 0
minFragmentLength = 150
maxFragmentLength = 450
for utr in BedIterator(bed):
fragmentLength = random.randrange(minFragmentLength, maxFragmentLength, 1) #+ readLength
fragmentLength = min(fragmentLength, utr.getLength())
start = max(0, utr.getLength() - fragmentLength)
end = utr.getLength() #- readLength
totalLength += (end - start)
# min(utr.getLength() + readLength / 4, fragmentLength + readLength / 4)
print(utr.name, start, end, utr.name, utr.score, "+", start, end, "255,0,0", "1", (end - start), 0, sep="\t", file=bed12File)
bed12File.close()
output = shell(getBinary("genexplvprofile.py") + " --geometric 1 " + bed12 + " 2> /dev/null > " + explv)
if len(output.strip()) > 5:
print(output)
return totalLength
示例7: needle
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sequence [as 别名]
def needle(chrom, start, end, name, score, strand):
n = 0
item=BedTool([(chrom, start, end, name, score, strand)])
item = item.sequence(fi=genome, s=True)
temp = open(item.seqfn).read().split('\n')[1]
if name == "MIRb":
needle_cline = NeedleCommandline(asequence="asis:"+MIRb, bsequence="asis:"+temp,gapopen=10, gapextend=0.5, outfile='stdout')
child = subprocess.Popen(str(needle_cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=(sys.platform!="win32"))
child.wait()
align = AlignIO.read(child.stdout, "emboss")
n = char.search(str(align[1,:].seq)).end()
elif name == "MIRc":
needle_cline = NeedleCommandline(asequence="asis:"+MIRc, bsequence="asis:"+temp,gapopen=10, gapextend=0.5, outfile='stdout')
child = subprocess.Popen(str(needle_cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=(sys.platform!="win32"))
child.wait()
align = AlignIO.read(child.stdout, "emboss")
n = char.search(str(align[1,:].seq)).end()
elif name == "MIR3":
needle_cline = NeedleCommandline(asequence="asis:"+MIR3, bsequence="asis:"+temp,gapopen=10, gapextend=0.5, outfile='stdout')
child = subprocess.Popen(str(needle_cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=(sys.platform!="win32"))
child.wait()
align = AlignIO.read(child.stdout, "emboss")
n = char.search(str(align[1,:].seq)).end()
elif name == "MIR":
needle_cline = NeedleCommandline(asequence="asis:"+MIR, bsequence="asis:"+temp,gapopen=10, gapextend=0.5, outfile='stdout')
child = subprocess.Popen(str(needle_cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=(sys.platform!="win32"))
child.wait()
align = AlignIO.read(child.stdout, "emboss")
n = char.search(str(align[1,:].seq)).end()
elif "Alu" in name:
needle_cline = NeedleCommandline(asequence="asis:"+ALU, bsequence="asis:"+temp,gapopen=10, gapextend=0.5, outfile='stdout')
child = subprocess.Popen(str(needle_cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=(sys.platform!="win32"))
child.wait()
align = AlignIO.read(child.stdout, "emboss")
n = char.search(str(align[1,:].seq)).end()
return n
示例8: extract_fasta
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sequence [as 别名]
def extract_fasta(self, bed_in, fasta_in, fasta_out=None):
"""Extract fasta of bed regions
Parameters
----------
bed_in: string
Path to input bed
fasta_in: string
Absolute path to location of reference fasta file
fasta_out: string
Path to write extracted fasta sequence
Returns
-------
fasta: string
Fasta sequence combined
"""
bed = BedTool(bed_in)
extracted_fasta = bed.sequence(fi=os.path.abspath(fasta_in))
temp_fasta = extracted_fasta.seqfn
make_uppercase_fasta(temp_fasta, os.path.abspath(fasta_out))
os.remove(temp_fasta)
示例9: open
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sequence [as 别名]
import sys
import argparse
from pybedtools import BedTool
parser = argparse.ArgumentParser(description='get fasta from bed files and count ATCG percentage')
parser.add_argument('-b', '--bed', required=True, help='input bed file')
parser.add_argument('-f', '--fasta', required=True, help='fasta file')
args = parser.parse_args()
with open(args.bed) as bedFile:
fasta = BedTool(args.fasta)
for line in bedFile:
bedline = BedTool(line, from_string=True)
get_fasta = bedline.sequence(fi=fasta, split=True, s=True)
seq = (open(get_fasta.seqfn).read()).split('\n')[1]
#print seq
seq = seq.upper()
countA = float(seq.count('A'))
countT = float(seq.count('T'))
countC = float(seq.count('C'))
countG = float(seq.count('G'))
seq_len= len(seq)
percentageA = countA / seq_len
percentageT = countT / seq_len
percentageC = countC / seq_len
percentageG = countG / seq_len
print line.split()[3],'\t',percentageA,'\t',percentageT,'\t',percentageC,'\t',percentageG
示例10: Bedfile
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import sequence [as 别名]
class Bedfile(object):
"""Class to crate a bed file object
Parameters
----------
filepath: string
Absolute path to bedfile
genome_table: string
Absolute path to geonme chromosome size file
"""
def __init__(self, filepath, genome_table):
self.filepath = filepath
self.bed_format = None
if not os.path.isfile(filepath):
raise MocaException('Bed file {} not found'.format(self.filepath))
self._read()
self.bed_format = self.guess_bedformat()
self.sort_bed()
self.bed = BedTool(filepath)
self.genome_table = genome_table
assert self.bed_Format is not None
def _read(self):
try:
self.bed_df = pandas.read_table(self.filepath,
header=None)
except Exception as e:
raise MocaException('Error reading bed file {}'.format(self.filepath),
'Traceback: {}'.format(e))
def guess_bedformat(self):
"""Method to guess bed format
Returns
-------
bed_format: string
BED format
Example:
>>> bed_df = Bedfile('file.bed')
>>> print(bed_df.guess_bed_format())
"""
self.bed_columns = self.bed_df.columns
count = len(self.bed_columns)
try:
bed_format = __BED_TYPES__[count]
except KeyError:
raise MocaException('Bed file had {} columns. Supported column lengths are {}')
return bed_format
def slop_bed(self, flank_length=5):
"""Add flanking sequences to bed file
Parameters
----------
flank_length: int
the bed region is expanded in both direction by flank_length number of bases
Returns
-------
slop_bed: dataframe
Slopped bed data object
"""
self.bed.slop(g=self.genome_table,
b=flank_length
)
def convert_to_scorefile(self):
"""
filename, file_extension = os.path.splitext(self.filepath)
filename += '.sorted'
self.bed_df.to_csv(filename+file_extension,
sep='\t',
columns=['chrom', 'peak_positions', 'score'],
index=False,
header=False)
"""
if filetype=='narrowPeak':
filter_df1 = df[df.peak.astype(int)==-1]
filter_df2 = df[df.peak.astype(int)!=-1]
filter_df1['peak_positions'] = (filter_df1['chromStart'].astype(int)+filter_df1['chromEnd'].astype(int))
filter_df1['peak_positions'] = [int(x/2) for x in filter_df1['peak_positions'].astype(int)]
filter_df2['peak_positions'] = filter_df2['chromStart'].astype(int)+filter_df2['peak'].astype(int)
df = pandas.concat([filter_df1, filter_df2])
else:
df['peak_positions'] = (df['chromStart']+df['chromEnd'])
df['peak_positions'] = [int(x/2) for x in df['peak_positions'].astype(int)]
def extract_fasta(self, fasta_file):
"""Extract fasta of bed regions
Parameters
----------
fasta_file: string
Absolute path to location of fasta file
Returns
-------
fasta: string
Fasta sequence combined
"""
#.........这里部分代码省略.........