本文整理汇总了Python中pybedtools.BedTool.seq方法的典型用法代码示例。如果您正苦于以下问题:Python BedTool.seq方法的具体用法?Python BedTool.seq怎么用?Python BedTool.seq使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybedtools.BedTool
的用法示例。
在下文中一共展示了BedTool.seq方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: sequence_from_bedfile
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import seq [as 别名]
def sequence_from_bedfile(fastafile, features=None, bedfile=None, pad5=0, pad3=0):
"""Fasta sequences from set of genomic features in a bed file
Args:
fastafile: fasta file with genomic sequence
features: dataframe of features/coords with bed file col names
bedfile: optionally provide a bed file instead
pad5,pad3: flanking sequence at 5' or 3' ends
Returns:
a pandas dataframe with name, sequence and coord columns"""
from pybedtools import BedTool
if bedfile != None:
features = utils.bed_to_dataframe(bedfile)
new = []
for n,r in features.iterrows():
if r.strand == '+':
coords = (r.chr,r.chromStart-pad5,r.chromEnd+pad3)
seq = str(BedTool.seq(coords, fastafile))
else: #reverse strand
coords = (r.chr,r.chromStart-pad3,r.chromEnd+pad5)
seq = str(BedTool.seq(coords, fastafile))
seq = HTSeq.Sequence(seq).get_reverse_complement()
#print n, coords, r['name']
new.append([r['name'],str(seq),coords])
new = pd.DataFrame(new, columns=['name','seq','coords'])
return new
示例2: CpG_type
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import seq [as 别名]
def CpG_type(chrom,beg,end, in_fa,M_length):
beg,end = check_pos(chrom,beg,end,M_length)
seq = BedTool.seq((chrom, beg, end),in_fa)
seq = seq.upper()
i = 0;
status = "ICP";
maxRcpg = 0;
while i+500 < len(seq) :
tmp = seq[i:i+500]
C = tmp.count("C")
G = tmp.count("G")
CpG= tmp.count("CG")
Cgc = (C+G)/500
Rcpg = 0
if C != 0 and G != 0:
Rcpg = 500*CpG/(C*G)
if Rcpg >= 0.75 and Cgc >= 0.55:
status = "HCP"
break
else:
if maxRcpg < Rcpg:
maxRcpg = Rcpg
i += 5
if maxRcpg < 0.48:
status = "LCP"
return status
示例3: process_file
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import seq [as 别名]
def process_file(args):
gtf = BedTool(args.infile[0])
for iv in gtf:
seq = BedTool.seq((iv.chrom, iv.start, iv.end), args.fasta_file[0])
if iv.strand == '-':
seq = str(Seq(seq).reverse_complement())
num_gs = 0
while seq.upper().startswith('G'):
num_gs += 1
seq = seq[1:]
if iv.strand == '+':
iv.start += num_gs
elif iv.strand == '-':
iv.end -= num_gs
args.outfile.write(str(iv))
示例4: int
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import seq [as 别名]
pair_key = columns[15] + ' ' + columns[16]
# Filtering variants in TCGA
# 1) SNPs
# 2) This sample comparison exists in GDC
if columns[9] == "SNP" and pair_key in gdc_pairs:
start = lo.convert_coordinate('chr' + columns[4], int(columns[5]))
end = lo.convert_coordinate('chr' + columns[4], int(columns[6]))
total_variants += 1
# Check if reference has been correctly crossed
if start is not None and end is not None and len(start)==1 and len(end)==1:
refbase = BedTool.seq(start[0][0].replace('chr','') + ':' + str(start[0][1]) + '-' + str(end[0][1]), fastaRef)
# Check if reference in TCGA is the same in hg38 ref
if refbase == columns[10]:
variant_key = ' '.join([start[0][0], str(start[0][1]), str(end[0][1]), start[0][2], columns[15], columns[16]])
# Create pair if it is not created
if pair_key in pair_list:
pair_list[pair_key][4] += 1
else:
pair_list[pair_key] = [0] * nfiles_gdc + [1]
# Check if this is a TP in all gdc files
for i in range(0,nfiles_gdc):
if variant_key in gdc_var_files_list[i]: