本文整理汇总了Python中cyvcf2.VCF属性的典型用法代码示例。如果您正苦于以下问题:Python cyvcf2.VCF属性的具体用法?Python cyvcf2.VCF怎么用?Python cyvcf2.VCF使用的例子?那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类cyvcf2
的用法示例。
在下文中一共展示了cyvcf2.VCF属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_vep_csq_fields
# 需要导入模块: import cyvcf2 [as 别名]
# 或者: from cyvcf2 import VCF [as 别名]
def get_vep_csq_fields(cls: Type[V], vcf_raw_headers: List[str]) -> List[str]:
"""Extract the CSQ fields VEP output in the given VCF."""
# Get CSQ spec
# Reverse the header order because the newer header appears later
try:
csq_info_header = next(
l for l in reversed(vcf_raw_headers) if l.startswith("##INFO=<ID=CSQ,")
)
except StopIteration:
raise ValueError(f"Cannot find CSQ format in the VCF header")
m = re.search(r"Format: ([\w\|]+)['\"]", csq_info_header)
if m:
csq_format = m.group(1)
else:
raise ValueError(
f"Cannot parse the CSQ field format from its INFO VCF header: {csq_info_header}"
)
csq_fields = csq_format.split("|")
return csq_fields
示例2: read_vcf
# 需要导入模块: import cyvcf2 [as 别名]
# 或者: from cyvcf2 import VCF [as 别名]
def read_vcf(cls: Type[V], path: Path) -> Generator[V, None, None]:
"""
Read VCF record from `path`.
This function walks through each variant record in the given VCF using :class:`cyvcf2.VCF
<cyvcf2.cyvcf2.VCF>`, and yields the record as a :class:`Variant` object.
See also :meth:`read_and_parse_vcf` to read and parse the VCF.
Args:
path: Path to the VCF.
Returns:
An generator walking through all variants per record.
"""
with closing(VCF(str(path))) as vcf:
for cy_variant in vcf:
variant = cls.from_cyvcf2(cy_variant)
yield variant
示例3: main
# 需要导入模块: import cyvcf2 [as 别名]
# 或者: from cyvcf2 import VCF [as 别名]
def main(vcf):
variants = VCF(vcf)
samples = variants.samples
identifiers = {i: set() for i in samples}
for v in variants:
for sample, call in zip(samples, v.gt_types):
if is_variant(call):
identifiers[sample].add(v.ID)
df = pd.DataFrame(index=samples, columns=samples)
for i_sample, i_values in identifiers.items():
for j_sample, j_values in identifiers.items():
df.loc[i_sample, j_sample] = len(i_values & j_values)
try:
proper_names = [i.split('/')[1].split('.')[0] for i in samples]
except IndexError:
proper_names = samples
sns.heatmap(data=df,
annot=True,
fmt="d",
linewidths=0.5,
xticklabels=proper_names,
yticklabels=proper_names)
plt.savefig("SV-calls_heatmap.png", bbox_inches="tight")
示例4: get_vep_scores
# 需要导入模块: import cyvcf2 [as 别名]
# 或者: from cyvcf2 import VCF [as 别名]
def get_vep_scores(vcf_name, vep_vcf_key="CSQ", sel_vep_keys=["phyloP46way_placental", "phyloP46way_primate", "CADD_PHRED", "CADD_RAW"]):
vcf_fh = cyvcf2.VCF(vcf_name)
# get the correct elements
for hdr in vcf_fh.header_iter():
hdr_info = hdr.info()
if 'ID' in hdr_info:
if hdr_info['ID'] == vep_vcf_key:
vep_keys = hdr_info['Description'].split(": ")[-1].rstrip('"').split("|")
break
sel_vep_elms = [vep_keys.index(k) for k in sel_vep_keys]
info_tags = []
entries = []
# Iterate over all entries and extract the `info_tag` if set, otherwise return all INFO tags
for rec in vcf_fh:
info_dict = dict(rec.INFO)
if vep_vcf_key in info_dict:
vep_entries = info_dict[vep_vcf_key].split(",")[0].split("|")
variant_uid = ":".join([rec.CHROM, str(rec.POS), rec.REF, rec.ALT[0]])
vals = [vep_entries[i] for i in sel_vep_elms]
entries.append(pd.Series([vep_entries[i] for i in sel_vep_elms], name = variant_uid, index = sel_vep_keys))
# Turn into a data frame
df = pd.DataFrame(entries,)
df = df.replace("", "nan").astype(float)
# dedup
df = df.loc[~pd.Series(df.index.values).duplicated().values,:]
return df
示例5: get_vep_scores
# 需要导入模块: import cyvcf2 [as 别名]
# 或者: from cyvcf2 import VCF [as 别名]
def get_vep_scores(vcf_name,
vep_vcf_key="CSQ",
sel_vep_keys=["phyloP46way_placental",
"phyloP46way_primate",
"CADD_phred",
"CADD_raw"]):
vcf_fh = cyvcf2.VCF(vcf_name)
# get the correct elements
for hdr in vcf_fh.header_iter():
hdr_info = hdr.info()
if 'ID' in hdr_info:
if hdr_info['ID'] == vep_vcf_key:
vep_keys = hdr_info['Description'].split(": ")[-1].rstrip('"').split("|")
break
sel_vep_elms = [vep_keys.index(k) for k in sel_vep_keys]
info_tags = []
entries = []
# Iterate over all entries and extract the `info_tag` if set, otherwise return all INFO tags
for rec in vcf_fh:
info_dict = dict(rec.INFO)
if vep_vcf_key in info_dict:
vep_entries = info_dict[vep_vcf_key].split(",")[0].split("|")
variant_uid = ":".join([rec.CHROM, str(rec.POS), rec.REF, rec.ALT[0]])
vals = [vep_entries[i] for i in sel_vep_elms]
entries.append(pd.Series([vep_entries[i] for i in sel_vep_elms], name = variant_uid, index = sel_vep_keys))
# Turn into a data frame
df = pd.DataFrame(entries,)
df = df.replace("", "nan").astype(float)
# dedup
df = df.loc[~pd.Series(df.index.values).duplicated().values,:]
return df
示例6: test_hemi
# 需要导入模块: import cyvcf2 [as 别名]
# 或者: from cyvcf2 import VCF [as 别名]
def test_hemi():
"""
make sure that we are getting the correct gt_types
for hemizygous variants
"""
for p in (HEM_PATH, VCF_PATH):
vcf = VCF(p)
for v in vcf:
check_var(v)
示例7: from_cyvcf2
# 需要导入模块: import cyvcf2 [as 别名]
# 或者: from cyvcf2 import VCF [as 别名]
def from_cyvcf2(cls: Type[V], variant: CyVCF2Variant) -> V:
"""
Create one Variant object based on the given
:class:`cyvcf2.Variant <cyvcf2.cyvcf2.Variant>` VCF record.
"""
return cls(
chrom=variant.CHROM,
start_pos=variant.start + 1,
end_pos=variant.end,
ref_allele=variant.REF,
alt_allele=variant.ALT[0],
id=variant.ID,
filter=variant.FILTER,
info=dict(variant.INFO),
)
示例8: get_vep_version
# 需要导入模块: import cyvcf2 [as 别名]
# 或者: from cyvcf2 import VCF [as 别名]
def get_vep_version(cls: Type[V], vcf_raw_headers: List[str]) -> str:
"""Extract the VEP version in the given VCF."""
# Find VEP version
# Reverse the header order because the newer header appears later
try:
vep_header = next(
l for l in reversed(vcf_raw_headers) if l.startswith("##VEP=")
)
vep_version = re.match(r"^##VEP=['\"]?v(\d+)['\"]?", vep_header).group(1) # type: ignore
except (StopIteration, AttributeError):
logger.warning(f"Cannot find VEP version in the VCF header")
vep_version = "UNKNOWN"
return vep_version
示例9: __init__
# 需要导入模块: import cyvcf2 [as 别名]
# 或者: from cyvcf2 import VCF [as 别名]
def __init__(self, *args, **kwargs):
from cyvcf2 import VCF
super(MultiSampleVCF, self).__init__(*args, **kwargs, strict_gt=True)
self.sample_mapping = dict(zip(self.samples, range(len(self.samples))))
示例10: one_variant
# 需要导入模块: import cyvcf2 [as 别名]
# 或者: from cyvcf2 import VCF [as 别名]
def one_variant(request, variant_clinical_file):
LOG.info("Return one parsed variant")
variant_parser = VCF(variant_clinical_file)
variant = next(variant_parser)
return variant
示例11: one_vep97_annotated_variant
# 需要导入模块: import cyvcf2 [as 别名]
# 或者: from cyvcf2 import VCF [as 别名]
def one_vep97_annotated_variant(request, vep_97_annotated_variant_clinical_file):
LOG.info("Return one parsed variant")
variant_parser = VCF(vep_97_annotated_variant_clinical_file)
variant = next(variant_parser)
return variant
示例12: one_cancer_manta_SV_variant
# 需要导入模块: import cyvcf2 [as 别名]
# 或者: from cyvcf2 import VCF [as 别名]
def one_cancer_manta_SV_variant(request, vep_94_manta_annotated_SV_variants_file):
LOG.info("Return one parsed cancer SV variant")
variant_parser = VCF(vep_94_manta_annotated_SV_variants_file)
variant = next(variant_parser)
return variant
示例13: one_variant_customannotation
# 需要导入模块: import cyvcf2 [as 别名]
# 或者: from cyvcf2 import VCF [as 别名]
def one_variant_customannotation(request, customannotation_snv_file):
LOG.info("Return one parsed variant with custom annotations")
variant_parser = VCF(customannotation_snv_file)
variant = next(variant_parser)
return variant
示例14: one_sv_variant
# 需要导入模块: import cyvcf2 [as 别名]
# 或者: from cyvcf2 import VCF [as 别名]
def one_sv_variant(request, sv_clinical_file):
LOG.info("Return one parsed SV variant")
variant_parser = VCF(sv_clinical_file)
variant = next(variant_parser)
return variant
示例15: rank_results_header
# 需要导入模块: import cyvcf2 [as 别名]
# 或者: from cyvcf2 import VCF [as 别名]
def rank_results_header(request, variant_clinical_file):
LOG.info("Return a VCF parser with one variant")
variants = VCF(variant_clinical_file)
rank_results = parse_rank_results_header(variants)
return rank_results