本文整理汇总了Python中pysam.asTuple函数的典型用法代码示例。如果您正苦于以下问题:Python asTuple函数的具体用法?Python asTuple怎么用?Python asTuple使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了asTuple函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_tabix_multi_ps_open
def test_tabix_multi_ps_open(self):
with open(self.tabix_ref,"rb") as fh1:
with open(self.tabix_ref,"rb") as fh2:
ps1 = pysam.tabix_file_iterator(fh1,pysam.asTuple())
ps2 = pysam.tabix_file_iterator(fh2,pysam.asTuple())
reader = MockReader(ps1,ps2,self.tabix_ref,tabix=True)
for expected, found in zip(reader,self.bed_lines+self.bed_lines):
self.assertEqual(expected.strip("\n"),found.strip("\n"))
示例2: parse_annotations
def parse_annotations(chrom, pos):
AF_supAFR = CSQ = 'NA'
if chrom == 'X':
if pos <= 2699520:
replace = 'X_PAR1'
elif pos >= 154931044:
replace = 'X_PAR2'
else:
replace = 'X_nonPAR'
else:
replace = chrom
path_vcf = '../../../SHAPEIT/out_annotate_2016Dec28/{}.minAC1.no_mask.without_related.vcf.gz'.format(replace)
tbx = pysam.TabixFile(path_vcf)
for row in tbx.fetch(chrom, pos - 1, pos, parser=pysam.asTuple()):
for _ in row[7].split(';'):
if _ == 'DB':
continue
k, v = _.split('=')
if k == 'AF_supAFR':
AF_supAFR = v
elif k == 'CSQ':
CSQ = v
return AF_supAFR, CSQ
示例3: load_segmented_data
def load_segmented_data(filepath, interval):
res = genomic_interval_set()
tabix = pysam.TabixFile(filepath)
for row in tabix.fetch(interval.chrom, interval.start, interval.end, parser = pysam.asTuple()):
chrom=row[0]
start = int(row[1])
end = int(row[2])
try:
name=row[3]
except:
name='.'
try:
score=float(row[4])
except:
score=-np.inf
try:
strand=row[5]
except:
strand='+'
res += genomic_interval(chrom, start, end, name=name, score=score, strand=strand)
tabix.close()
return res
示例4: annotate_variants_list
def annotate_variants_list(args, select_cursor, update_cursor):
"""
Populate a new, user-defined column in the variants
table with a INTEGER indicating the count of overlaps
between the variant and the
annotation file.
"""
add_requested_column(args.col_name, update_cursor)
# For each, use Tabix to count overlaps with the user-defined
# annotation file. Update the variant row with the count.
annos = pysam.Tabixfile(args.anno_file)
select_cursor.execute("SELECT chrom, start, end, variant_id FROM variants")
for row in select_cursor:
hit_list = []
for hit in annos.fetch(str(row['chrom']), int(row['start']), int(row['end']),
parser=pysam.asTuple()):
try:
hit_list.append(hit[int(args.col_extract) - 1])
except IndexError:
sys.exit("Column " + args.col_extract + " exceeds \
the number of columns in your \
annotation file. Exiting.")
hits = ",".join(hit_list)
if len(hit_list):
update_qry = "UPDATE variants SET " + args.col_name + " = '" + hits + \
"' WHERE variant_id = " + str(row['variant_id'])
else:
update_qry = "UPDATE variants SET " + args.col_name + " = NULL" + \
" WHERE variant_id = " + str(row['variant_id'])
update_cursor.execute(update_qry)
示例5: testTabixIndexedTsvCreation
def testTabixIndexedTsvCreation(self):
inFile = "testdata/ESP6500SI-V2.chr1.snps_indels.head.25.txt"
destDir = "out"
# chr, startPos, endPos
resultIndexedFile = TabixIndexer.index(destDir=destDir, inputFilename=inFile, fileColumnNumList=[0, 1, 1])
self.assertTrue(os.path.exists(resultIndexedFile), "No index file was generated.")
chrom = "1"
start = "69594"
end = "69594"
tsvRecords = None
tsvReader = pysam.Tabixfile(filename=resultIndexedFile) # initialize the tsv reader
try:
tsvRecords = tsvReader.fetch(chrom, int(start)-1, int(end), parser=pysam.asTuple())
except ValueError:
pass
tsvRecord = None
for tsvRecord in tsvRecords:
self.assertEqual(tsvRecord[5], "2,6190", "Value in column sixth does not match the expected value.")
self.assertIsNotNone(tsvRecord, "No record for %s:%s-%s was found." % (chrom, start, end))
os.remove(resultIndexedFile)
示例6: annotate
def annotate(self,bedline,genome):
c = bedline.rstrip().rsplit("\t")
chr = c[0]
start = c[1]
end = c[2]
if not re.search('chr',chr):
raise LookupError("chromosome names must start with chr: " + chr)
return []
if (self.genome != genome):
raise LookupError("tried to compare a %s bedfile to a %s annotation." % (genome,self.genome))
return []
else:
annotations = []
if (chr and start and end):
if self.tabixContigs.has_key(chr):
tabixTupleParse = self.tabix.fetch(reference=chr,
start=int(start),
end=int(end),
parser=pysam.asTuple())
for tabixTuple in tabixTupleParse:
annotations.append(tabixTuple[3])
return uniqann(annotations)
else:
return []
else:
raise LookupError("can't find chr,start,end. File must be tab-delimited")
return []
示例7: testIteratorUncompressed
def testIteratorUncompressed(self):
'''test iteration from uncompressed file.'''
tmpfilename = 'tmp_testIteratorUncompressed'
infile = gzip.open(self.filename, "rb")
outfile = open(tmpfilename, "wb")
outfile.write(infile.read())
outfile.close()
infile.close()
with open(tmpfilename) as infile:
for x, r in enumerate(pysam.tabix_iterator(
infile, pysam.asTuple())):
self.assertEqual(self.compare[x], list(r))
self.assertEqual(len(self.compare[x]), len(r))
# test indexing
for c in range(0, len(r)):
self.assertEqual(self.compare[x][c], r[c])
# test slicing access
for c in range(0, len(r) - 1):
for cc in range(c + 1, len(r)):
self.assertEqual(self.compare[x][c:cc],
r[c:cc])
os.unlink(tmpfilename)
示例8: get_snp_data
def get_snp_data(*args, **kwargs):
'''
proxy for TabixFile.fetch
'''
kwargs['multiple_iterators'] = True
return TabixFile(SNP_FILE, parser=asTuple()).\
fetch(*args, **kwargs)
示例9: testTuple
def testTuple( self ):
for x, r in enumerate(self.tabix.fetch( parser = pysam.asTuple() )):
self.assertEqual( self.compare[x], list(r) )
self.assertEqual( len(self.compare[x]), len(r) )
for c in range(0,len(r)):
self.assertEqual( self.compare[x][c], r[c] )
示例10: testCopy
def testCopy(self):
a = self.tabix.fetch(parser=pysam.asTuple()).next()
b = copy.copy(a)
self.assertEqual(a, b)
a = self.tabix.fetch(parser=pysam.asGTF()).next()
b = copy.copy(a)
self.assertEqual(a, b)
示例11: testUnset
def testUnset( self ):
for x, r in enumerate(self.tabix.fetch( parser = pysam.asTuple() )):
self.assertEqual( self.compare[x], list(r) )
c = list(r)
e = list(r)
for y in range(len(r)):
r[y] = c[y] = None
e[y] = ""
self.assertEqual( c, list(r) )
self.assertEqual( "\t".join(e), str(r) )
示例12: get_gapped_wnds
def get_gapped_wnds(self,chr,tbx_gaps):
gapped_wnds = []
for t in tbx_gaps.fetch(chr,parser=pysam.asTuple()):
_chr,start,end = t
wnd_start = np.searchsorted(self.starts,start)
wnd_end = np.searchsorted(self.starts,end)
gapped_wnds.append(tuple([wnd_start,wnd_end]))
return gapped_wnds
示例13: get_ref_alt_from_dbSNP
def get_ref_alt_from_dbSNP(chrom, pos, path_vcf):
tbx = pysam.TabixFile(path_vcf)
for row in tbx.fetch(chrom, pos - 1, pos, parser=pysam.asTuple()):
if len(row[3]) == 1 and 1 in map(len, row[4].split(',')):
break
else:
stop_not_found_in_dbSNP
assert ',' not in row[4], row
return row[3], row[4]
示例14: get_overlapping_wnds
def get_overlapping_wnds(self,chr,tbx):
wnd_starts, wnd_ends = self.get_wnds_by_chr(chr)
bnds = np.array([ [int(l[1]),int(l[2])]
for l in tbx.fetch(chr,parser=pysam.asTuple()) ])
start_idxs = np.searchsorted(wnd_starts,bnds[:,0])
end_idxs = np.searchsorted(wnd_starts,bnds[:,1])
#print start_idxs
#print end_idxs
ret = np.c_[start_idxs,end_idxs]
return ret
示例15: __init__
def __init__(self, task_queue, results_queue, family, args):
multiprocessing.Process.__init__(self)
self.task_queue = task_queue
self.family = family
self.results_queue = results_queue
self.verbosity = args.verbose
self.phased = args.phased
self.cadd_file = args.cadd_file[0]
self.chr_prefix = args.chr_prefix
if self.cadd_file:
self.cadd_file = Tabixfile(self.cadd_file, parser = asTuple())