本文整理汇总了Python中BCBio.GFF.GFFParser.parse方法的典型用法代码示例。如果您正苦于以下问题:Python GFFParser.parse方法的具体用法?Python GFFParser.parse怎么用?Python GFFParser.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类BCBio.GFF.GFFParser
的用法示例。
在下文中一共展示了GFFParser.parse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: not_t_full_celegans
# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
def not_t_full_celegans(self):
"""Test the full C elegans chromosome and GFF files.
This is used to test GFF on large files and is not run as a standard
test. You will need to download the files and adjust the paths
to run this.
"""
# read the sequence information
seq_file = os.path.join(self._full_dir, "c_elegans.WS199.dna.fa")
gff_file = os.path.join(self._full_dir, "c_elegans.WS199.gff3")
seq_handle = open(seq_file)
seq_dict = SeqIO.to_dict(SeqIO.parse(seq_handle, "fasta"))
seq_handle.close()
#with open(gff_file) as gff_handle:
# possible_limits = feature_adder.available_limits(gff_handle)
# pprint.pprint(possible_limits)
rnai_types = [('Orfeome', 'PCR_product'),
('GenePair_STS', 'PCR_product'),
('Promoterome', 'PCR_product')]
gene_types = [('Non_coding_transcript', 'gene'),
('Coding_transcript', 'gene'),
('Coding_transcript', 'mRNA'),
('Coding_transcript', 'CDS')]
limit_info = dict(gff_source_type = rnai_types + gene_types)
parser = GFFParser()
for rec in parser.parse(gff_file, seq_dict, limit_info=limit_info):
pass
示例2: t_unknown_seq
# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
def t_unknown_seq(self):
"""Prepare unknown base sequences with the correct length.
"""
parser = GFFParser()
rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file))
assert len(rec_dict["I"].seq) == 12766937
assert len(rec_dict["X"].seq) == 17718531
示例3: t_gff3_to_gff3
# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
def t_gff3_to_gff3(self):
"""Read in and write out GFF3 without any loss of information.
"""
parser = GFFParser()
recs = SeqIO.to_dict(parser.parse(self._test_gff_file))
out_handle = StringIO.StringIO()
writer = GFF3Writer()
writer.write(recs.values(), out_handle)
wrote_handle = StringIO.StringIO(out_handle.getvalue())
recs_two = SeqIO.to_dict(parser.parse(wrote_handle))
orig_rec = recs.values()[0]
re_rec = recs.values()[0]
assert len(orig_rec.features) == len(re_rec.features)
for i, orig_f in enumerate(orig_rec.features):
assert str(orig_f) == str(re_rec.features[i])
示例4: gff_to_bed
# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
def gff_to_bed(gff_file, bed_fh=sys.stdout, cds=True, species=None, rename=False):
parser = GFFParser()
seqids = parser.parse(gff_file, None)
cur_chr = None
cur_gene_order = 0
for seqid in seqids:
for feat in seqid.features:
subf = feat.sub_features
if feat.type in ("chromosome", "protein"):
continue
is_cds = any(f.type == "mRNA" or f.type == "CDS" for f in subf) and feat.type == "gene"
if cds == is_cds:
cur_gene_order += 1
if species != None:
seqid_final = species + seqid.id[-2:] # hard coded
else:
seqid_final = seqid.id
if rename:
if seqid.id != cur_chr:
cur_gene_order = 1
cur_chr = seqid.id
gene_name = seqid_final + "g" + "0" * (5 - len(str(cur_gene_order))) + str(cur_gene_order)
else:
gene_name = feat.id
print >> bed_fh, "\t".join(
str(x) for x in (seqid_final, int(str(feat.location.start)) + 1, feat.location.end, gene_name)
) # +1 is hard coded to current BCBio.GFF
示例5: t_ensembl_nested_features
# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
def t_ensembl_nested_features(self):
"""Test nesting of features with GFF2 files using transcript_id.
"""
parser = GFFParser()
rec_dict = SeqIO.to_dict(parser.parse(self._ensembl_file))
assert len(rec_dict["I"].features) == 2
t_feature = rec_dict["I"].features[0]
assert len(t_feature.sub_features) == 32
示例6: t_gff3_noval_attrib
# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
def t_gff3_noval_attrib(self):
"""Parse GFF3 file from NCBI with a key/value pair with no value.
"""
parser = GFFParser()
rec_dict = SeqIO.to_dict(parser.parse(self._test_ncbi))
assert len(rec_dict) == 1
t_feature = rec_dict.values()[0].features[0]
assert t_feature.qualifiers["pseudo"] == ["true"]
示例7: t_fasta_directive
# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
def t_fasta_directive(self):
"""Parse FASTA sequence information contained in a GFF3 file.
"""
parser = GFFParser()
recs = SeqIO.to_dict(parser.parse(self._gff_file))
assert len(recs) == 1
test_rec = recs['chr17']
assert str(test_rec.seq) == "GATTACAGATTACA"
示例8: t_gff_annotations
# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
def t_gff_annotations(self):
"""Check GFF annotations placed on an entire sequence.
"""
parser = GFFParser()
rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_ann_file))
final_rec = rec_dict['I']
assert len(final_rec.annotations.keys()) == 2
assert final_rec.annotations['source'] == ['Expr_profile']
assert final_rec.annotations['expr_profile'] == ['B0019.1']
示例9: t_no_dict_error
# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
def t_no_dict_error(self):
"""Ensure an error is raised when no dictionary to map to is present.
"""
parser = GFFParser(create_missing=False)
try:
for rec in parser.parse(self._test_gff_file):
pass
# no error -- problem
raise AssertionError('Did not complain with missing dictionary')
except KeyError:
pass
示例10: get_feature_cord
# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
def get_feature_cord(gff_file,user_feature="gene"):
"""returns st, stop of a feature with orintation as tuple
in a list to account for CDS
in a dictionary with gene id as key:[(st,stop,orin)]
freature can be one of 'protein','gene','mRNA','CDS','exon'
user_feature="gene"
a_dict['FBgn0031208']= [(7528, 9484, 1, '2L')]
user_feature = "mRNA".
a_dict['FBgn0031208']=[(7528, 9484, 1, 'FBtr0300689', '2L'),
(7528, 9484, 1, 'FBtr0300690', '2L'),
(7528, 9484, 1, 'FBtr0330654', '2L')]
user_feature = "CDS"
a_dict['FBgn0031208']= [([(7679, 8116), (8192, 8610)], 1, 'FBtr0300689', '2L'),
([(7679, 8116), (8192, 8589), (8667, 9276)], 1, 'FBtr0300690', '2L'),
([(7679, 8116), (8228, 8610)], 1, 'FBtr0330654', '2L')]
"""
limit_info = dict(gff_type = ['protein','gene','mRNA','CDS','exon'])
feature_dict = {}
parser = GFFParser()
in_handle = open(gff_file)
for rec in parser.parse(in_handle,limit_info=limit_info):
rec_id = rec.id
for feat in rec.features:
if feat.type == "gene":
gene_id = feat.id
if user_feature == "gene":
#gene_id = feat.id
assert gene_id not in feature_dict
feature_dict[gene_id]=[(feat.location.start.position,
feat.location.end.position,feat.strand,rec_id)]
else:
for sub in feat.sub_features:
if sub.type == "mRNA":
if user_feature == "mRNA":
info = (sub.location.start.position,
sub.location.end.position,sub.strand,sub.id,rec_id)
if gene_id in feature_dict:
feature_dict[gene_id].append(info)
else:
feature_dict[gene_id] = [info]
else:
codons = []
for sub_sub in sub.sub_features:
if sub_sub.type == "CDS":
st = sub_sub.location.start.position
end = sub_sub.location.end.position
codons.append((st,end))
info = (codons,sub.strand,sub.id,rec_id)
if gene_id in feature_dict:
feature_dict[gene_id].append(info)
else:
feature_dict[gene_id] = [info]
in_handle.close()
return feature_dict
示例11: extract_seq
# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
def extract_seq(gff_file,outfile):
'''for gff with seq attached goes through and parses out to seq rec as
fasta to a new file'''
in_handle = open(gff_file)
fasta_file = open(outfile,"w")
parser = GFFParser()
for rec in parser.parse(in_handle):#, limit_info=limit_info):
#rec_seq = rec.seq.tostring()
SeqIO.write(rec,fasta_file,"fasta")
in_handle.close()
fasta_file.close()
示例12: t_gff3_multiple_ids
# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
def t_gff3_multiple_ids(self):
"""Deal with GFF3 with non-unique ID attributes, using NCBI example.
"""
parser = GFFParser()
rec_dict = SeqIO.to_dict(parser.parse(self._test_ncbi))
assert len(rec_dict) == 1
t_features = rec_dict.values()[0].features[1:]
# 4 feature sets, same ID, different positions, different attributes
assert len(t_features) == 4
for f in t_features:
assert len(f.sub_features) == 3
示例13: t_wb_cds_nested_features
# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
def t_wb_cds_nested_features(self):
"""Nesting of GFF2 features with a flat CDS key value pair.
"""
parser = GFFParser()
rec_dict = SeqIO.to_dict(parser.parse(self._wb_alt_file))
assert len(rec_dict) == 2
features = rec_dict.values()[1].features
assert len(features) == 1
tfeature = features[0]
assert tfeature.id == "cr01.sctg102.wum.2.1"
assert len(tfeature.sub_features) == 7
示例14: t_local_map_reduce
# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
def t_local_map_reduce(self):
"""General map reduce framework without parallelization.
"""
cds_limit_info = dict(
gff_type = ["gene", "mRNA", "CDS"],
gff_id = ['I']
)
parser = GFFParser()
rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file,
limit_info=cds_limit_info))
test_rec = rec_dict['I']
assert len(test_rec.features) == 32
示例15: t_jgi_gff
# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
def t_jgi_gff(self):
"""Parsing of JGI formatted GFF2, nested using transcriptId and proteinID
"""
parser = GFFParser()
rec_dict = SeqIO.to_dict(parser.parse(self._jgi_file))
tfeature = rec_dict['chr_1'].features[0]
assert tfeature.location.nofuzzy_start == 37060
assert tfeature.location.nofuzzy_end == 38216
assert tfeature.type == 'inferred_parent'
assert len(tfeature.sub_features) == 6
sfeature = tfeature.sub_features[1]
assert sfeature.qualifiers['proteinId'] == ['873']
assert sfeature.qualifiers['phase'] == ['0']