本文整理汇总了Python中vcf.Reader方法的典型用法代码示例。如果您正苦于以下问题:Python vcf.Reader方法的具体用法?Python vcf.Reader怎么用?Python vcf.Reader使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类vcf
的用法示例。
在下文中一共展示了vcf.Reader方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_snvs_consensus
# 需要导入模块: import vcf [as 别名]
# 或者: from vcf import Reader [as 别名]
def load_snvs_consensus(snvs):
vcf_reader = vcf.Reader(filename=snvs)
snv_dtype = [('chrom', '<U50'), ('pos',int), ('gtype', '<U50'), ('ref',float), ('var',float)]
snv_df = np.empty([0,5],dtype=snv_dtype)
for record in vcf_reader:
try:
ref_reads, variant_reads = record.INFO['t_ref_count'], record.INFO['t_alt_count']
total_reads = ref_reads + variant_reads
if variant_reads != 0:
tmp = np.array((record.CHROM, record.POS, '', ref_reads, variant_reads), dtype=snv_dtype)
snv_df = np.append(snv_df,tmp)
except KeyError:
print('WARNING: missing count field(s) in record %s:%d' % (record.CHROM, record.POS))
return pd.DataFrame(snv_df)
示例2: countrecs
# 需要导入模块: import vcf [as 别名]
# 或者: from vcf import Reader [as 别名]
def countrecs(submission, truth, vtype='SNV', ignorechroms=None, truthmask=True):
''' return number of records in submission '''
assert vtype in ('SNV', 'SV', 'INDEL')
subvcfh = vcf.Reader(filename=submission)
truvcfh = vcf.Reader(filename=truth)
truchroms = dict([(trurec.CHROM, True) for trurec in truvcfh])
subrecs = 0
for subrec in subvcfh:
if passfilter(subrec):
if (ignorechroms is None or subrec.CHROM not in ignorechroms):
if not mask(subrec, truvcfh, truchroms, active=truthmask):
if subrec.is_snp and vtype == 'SNV':
subrecs += 1
if subrec.is_sv and vtype == 'SV':
subrecs += 1
if subrec.is_indel and vtype == 'INDEL':
subrecs += 1
return subrecs
示例3: _readVcf
# 需要导入模块: import vcf [as 别名]
# 或者: from vcf import Reader [as 别名]
def _readVcf(self, vcfFileName):
"""
Reads all variants and metadata from the specified VCF file and
store locally.
"""
vcfReader = vcf.Reader(filename=vcfFileName)
metadata = vcfReader.metadata
self._vcfVersion = metadata["fileformat"]
self._infos = vcfReader.infos
self._formats = vcfReader.formats
self.vcfSamples = vcfReader.samples
if "VEP" in metadata:
self._isVEP = True
else:
self._isVEP = False
for record in vcfReader:
self._referenceNames.add(record.CHROM)
# When an END info tag is present it takes precedence
if "END" in record.INFO:
record.end = record.INFO["END"]
self._variantRecords.append(record)
示例4: _readVcf
# 需要导入模块: import vcf [as 别名]
# 或者: from vcf import Reader [as 别名]
def _readVcf(self, vcfFileName):
"""
Reads all variants and metadata from the specified VCF file and
store locally.
"""
vcfReader = vcf.Reader(filename=vcfFileName)
metadata = vcfReader.metadata
self._vcfVersion = metadata["fileformat"]
self._infos = vcfReader.infos
self._formats = vcfReader.formats
self._filters = vcfReader.filters
self.vcfSamples = vcfReader.samples
for record in vcfReader:
self._reference_names.add(record.CHROM)
# When an END info tag is present it takes precedence
if "END" in record.INFO:
record.end = record.INFO["END"]
self._variantRecords.append(record)
示例5: readMergedCalls
# 需要导入模块: import vcf [as 别名]
# 或者: from vcf import Reader [as 别名]
def readMergedCalls(infile, filterByChromosome=True, readINFO=False, skipcallers=None):
"""Read a merged callset, and return:
- dictionary: caller name -> caller idx
- callsets(list of lists): [calleridx][callidx]
- calls: callidx -> record from merged"""
invcf = vcf.Reader(infile)
callerIdx = 0
callIdx = 0
callsets = []
callIdxToCall = []
callerIdxDict = {}
if skipcallers is None:
skipcallers = []
for rec in invcf:
ncalledthis = 0
if filterByChromosome and not mapped_to_chromosome(rec.CHROM):
continue
callers = [c for c in rec.INFO['Callers'] if not c in skipcallers]
called = []
for caller in callers:
if not (caller in called) and not (caller in skipcallers):
called.append(caller)
if not caller in callerIdxDict:
callerIdxDict[caller] = callerIdx
callerIdx += 1
callsets.append([])
callsets[callerIdxDict[caller]].append(callIdx)
ncalledthis += 1
assert len(called) == ncalledthis
if ncalledthis > 0:
chrom = rec.CHROM
posstart = rec.POS
callIdxToCall.append((len(called), chrom, posstart, str(rec.REF), str(rec.ALT[0]), ",".join(called)))
callIdx += 1
return callerIdxDict, callsets, callIdxToCall
示例6: vcftobkpts
# 需要导入模块: import vcf [as 别名]
# 或者: from vcf import Reader [as 别名]
def vcftobkpts(infile, outfile, width):
firstbkpts = loc.locationdict(width)
pairbkpts = loc.locationdict(width)
reader = vcf.Reader(infile)
for record in reader:
if record.FILTER == "PASS" or record.FILTER == "." or record.FILTER is None or (type(record.FILTER) is list and len(record.FILTER) == 0):
bkptPairs = breakpointsFromRecord(record)
for pair in bkptPairs:
addBkptToDictDict(pair[0], firstbkpts)
addBkptToDictDict(pair[1], pairbkpts)
# count how many breakpoints weren't in both dicts, for diagnostics;
# then add everything to first dict for outputting
nunmatched = 0
for bp in firstbkpts:
if not bkptInDictDict(bp, pairbkpts):
nunmatched += 1
for bp in pairbkpts:
if not bkptInDictDict(bp, firstbkpts):
nunmatched += 1
addBkptToDictDict(bp, firstbkpts)
print("#Num breakpoints not in both lists:",nunmatched,file=sys.stderr)
# now output everything
for bp in firstbkpts:
chrom, pos, strand, extendsRight = bp.asTuple()
start = pos-width/2
if start < 0:
start = 0
print("{0} {1} {2}".format(chrom, start, pos+width/2), file=outfile)
示例7: __init__
# 需要导入模块: import vcf [as 别名]
# 或者: from vcf import Reader [as 别名]
def __init__(self, vcf_in, filters=None, reference=None):
"""Constructor of variant set.
Parameters
----------
vcf_in: str
Path to the VCF file for loading information.
filters: str or dict, optional
Dictionary or string of the filter:threshold key value pairs.
"""
self.vcf_in = vcf_in
self._reader = vcf.Reader(filename=vcf_in)
self.out_template = VCFTemplate(self._reader)
self.filters = []
if filters is not None:
if isinstance(filters, str):
self.filters = str_to_filters(filters)
elif isinstance(filters, dict):
self.filters = make_filters(config=filters)
elif isinstance(filters, list):
self.filters = filters
else:
logging.warn("Could not create filters from %s", filters)
else:
reader = vcf.Reader(filename=self.vcf_in)
filters = {}
for filter_id in reader.filters:
filters.update(PHEFilterBase.decode(filter_id))
if filters:
self.filters = make_filters(config=filters)
self._variants = []
self._read_reference(reference)
示例8: __init__
# 需要导入模块: import vcf [as 别名]
# 或者: from vcf import Reader [as 别名]
def __init__(self, vcfs):
"""Instantiate ParallelVCFReader.
Parameters
----------
vcfs: list
List of path s to the VCF files to read.
"""
self._readers = { vcf_in: vcf.Reader(filename=vcf_in) for vcf_in in vcfs}
self._records = {}
self.update()
示例9: annotate
# 需要导入模块: import vcf [as 别名]
# 或者: from vcf import Reader [as 别名]
def annotate(self, vcf_path=None):
reader = vcf.Reader(filename=vcf_path)
total = 0
for record in reader:
self.mean += record.INFO.get("DP", 0)
self._mean_sqr += record.INFO.get("DP", 0) ** 2
total += 1
self.mean = self.mean * 1.0 / total
self._mean_sqr = self._mean_sqr * 1.0 / total
self.dev = math.sqrt(self._mean_sqr - self.mean ** 2)
示例10: test_add_metadata
# 需要导入模块: import vcf [as 别名]
# 或者: from vcf import Reader [as 别名]
def test_add_metadata(self):
self.var_set.add_metadata({"key": [{"value": "description"}]})
self.var_set.write_variants(self.vcf_out)
vcf_reader = vcf.Reader(filename=self.vcf_out)
self.assertDictContainsSubset({"key": [{"value": "description"}]}, vcf_reader.metadata)
示例11: test_call
# 需要导入模块: import vcf [as 别名]
# 或者: from vcf import Reader [as 别名]
def test_call(self):
reader = vcf.Reader(filename=self.vcf_in)
bad_positions = defaultdict(list)
good_positions = defaultdict(list)
na_positions = defaultdict(list)
for record in reader:
result = self.filter(record)
if result is None:
good_positions[record.CHROM].append(record.POS)
continue
elif result is False:
na_positions[record.CHROM].append(record.POS)
bad_positions[record.CHROM].append(record.POS)
for v in bad_positions.itervalues():
v.sort()
for v in good_positions.itervalues():
v.sort()
for v in na_positions.itervalues():
v.sort()
self.assertDictEqual(self.bad_positions, bad_positions)
self.assertDictEqual(self.good_positions, good_positions)
self.assertDictEqual(self.na_positions, na_positions)
示例12: test_call
# 需要导入模块: import vcf [as 别名]
# 或者: from vcf import Reader [as 别名]
def test_call(self):
reader = vcf.Reader(filename=self.vcf_in)
bad_positions = defaultdict(list)
good_positions = defaultdict(list)
na_positions = defaultdict(list)
for record in reader:
result = self.filter(record)
if result is None:
good_positions[record.CHROM].append(record.POS)
continue
elif result is False:
na_positions[record.CHROM].append(record.POS)
bad_positions[record.CHROM].append(record.POS)
for v in bad_positions.itervalues():
v.sort()
for v in good_positions.itervalues():
v.sort()
for v in na_positions.itervalues():
v.sort()
self.assertDictEqual(self.bad_positions, bad_positions)
self.assertDictEqual(self.good_positions, good_positions)
self.assertDictEqual(self.na_positions, na_positions)
示例13: test_call
# 需要导入模块: import vcf [as 别名]
# 或者: from vcf import Reader [as 别名]
def test_call(self):
reader = vcf.Reader(filename=self.vcf_in)
good_positions = defaultdict(list)
bad_positions = defaultdict(list)
na_positions = defaultdict(list)
for record in reader:
result = self.filter(record)
if result is None:
good_positions[record.CHROM].append(record.POS)
continue
elif result is False:
na_positions[record.CHROM].append(record.POS)
bad_positions[record.CHROM].append(record.POS)
for v in bad_positions.itervalues():
v.sort()
for v in good_positions.itervalues():
v.sort()
for v in na_positions.itervalues():
v.sort()
self.assertDictEqual(self.good_positions, good_positions)
self.assertDictEqual(self.bad_positions, bad_positions)
self.assertDictEqual(self.na_positions, na_positions)
示例14: test_call
# 需要导入模块: import vcf [as 别名]
# 或者: from vcf import Reader [as 别名]
def test_call(self):
reader = vcf.Reader(filename=self.vcf_in)
bad_positions = defaultdict(list)
na_positions = defaultdict(list)
good_positions = defaultdict(list)
for record in reader:
result = self.filter(record)
if result is None:
good_positions[record.CHROM].append(record.POS)
continue
elif result is False:
na_positions[record.CHROM].append(record.POS)
bad_positions[record.CHROM].append(record.POS)
for v in bad_positions.itervalues():
v.sort()
for v in good_positions.itervalues():
v.sort()
for v in na_positions.itervalues():
v.sort()
self.assertDictEqual(self.bad_positions, bad_positions)
self.assertDictEqual(self.na_positions, na_positions)
self.assertDictEqual(self.good_positions, good_positions)
示例15: check_variants
# 需要导入模块: import vcf [as 别名]
# 或者: from vcf import Reader [as 别名]
def check_variants(self, vcf_file):
reader = vcf.Reader(filename=vcf_file)
missing_snps = {}
missing_bad = {}
for r in reader:
if not r.FILTER:
try:
if r.is_snp:
self.good_vars.remove(r.POS)
except KeyError:
for f in r.FILTER:
if f not in missing_snps:
missing_snps[f] = 0
missing_snps[f] += 1
else:
try:
self.failed_vars.remove(r.POS)
except KeyError:
for f in r.FILTER:
if f not in missing_bad:
missing_bad[f] = 0
missing_bad[f] += 1
self.assertEqual(missing_snps, {})
self.assertLessEqual(len(self.good_vars), self.good_threshold,
">=%.2f different good positions: [%s]" % (self.threshold, ",".join(str(i) for i in self.failed_vars)))
self.assertLessEqual(len(self.failed_vars),
self.failed_threshold,
">=%.2f failed positions: [%s]" % (self.threshold, ",".join(str(i) for i in self.failed_vars)))