本文整理汇总了Python中BCBio.GFF.GFFParser类的典型用法代码示例。如果您正苦于以下问题:Python GFFParser类的具体用法?Python GFFParser怎么用?Python GFFParser使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了GFFParser类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: not_t_full_celegans
def not_t_full_celegans(self):
"""Test the full C elegans chromosome and GFF files.
This is used to test GFF on large files and is not run as a standard
test. You will need to download the files and adjust the paths
to run this.
"""
# read the sequence information
seq_file = os.path.join(self._full_dir, "c_elegans.WS199.dna.fa")
gff_file = os.path.join(self._full_dir, "c_elegans.WS199.gff3")
seq_handle = open(seq_file)
seq_dict = SeqIO.to_dict(SeqIO.parse(seq_handle, "fasta"))
seq_handle.close()
#with open(gff_file) as gff_handle:
# possible_limits = feature_adder.available_limits(gff_handle)
# pprint.pprint(possible_limits)
rnai_types = [('Orfeome', 'PCR_product'),
('GenePair_STS', 'PCR_product'),
('Promoterome', 'PCR_product')]
gene_types = [('Non_coding_transcript', 'gene'),
('Coding_transcript', 'gene'),
('Coding_transcript', 'mRNA'),
('Coding_transcript', 'CDS')]
limit_info = dict(gff_source_type = rnai_types + gene_types)
parser = GFFParser()
for rec in parser.parse(gff_file, seq_dict, limit_info=limit_info):
pass
示例2: gff_to_bed
def gff_to_bed(gff_file, bed_fh=sys.stdout, cds=True, species=None, rename=False):
parser = GFFParser()
seqids = parser.parse(gff_file, None)
cur_chr = None
cur_gene_order = 0
for seqid in seqids:
for feat in seqid.features:
subf = feat.sub_features
if feat.type in ("chromosome", "protein"):
continue
is_cds = any(f.type == "mRNA" or f.type == "CDS" for f in subf) and feat.type == "gene"
if cds == is_cds:
cur_gene_order += 1
if species != None:
seqid_final = species + seqid.id[-2:] # hard coded
else:
seqid_final = seqid.id
if rename:
if seqid.id != cur_chr:
cur_gene_order = 1
cur_chr = seqid.id
gene_name = seqid_final + "g" + "0" * (5 - len(str(cur_gene_order))) + str(cur_gene_order)
else:
gene_name = feat.id
print >> bed_fh, "\t".join(
str(x) for x in (seqid_final, int(str(feat.location.start)) + 1, feat.location.end, gene_name)
) # +1 is hard coded to current BCBio.GFF
示例3: t_unknown_seq
def t_unknown_seq(self):
"""Prepare unknown base sequences with the correct length.
"""
parser = GFFParser()
rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file))
assert len(rec_dict["I"].seq) == 12766937
assert len(rec_dict["X"].seq) == 17718531
示例4: t_fasta_directive
def t_fasta_directive(self):
"""Parse FASTA sequence information contained in a GFF3 file.
"""
parser = GFFParser()
recs = SeqIO.to_dict(parser.parse(self._gff_file))
assert len(recs) == 1
test_rec = recs['chr17']
assert str(test_rec.seq) == "GATTACAGATTACA"
示例5: t_ensembl_nested_features
def t_ensembl_nested_features(self):
"""Test nesting of features with GFF2 files using transcript_id.
"""
parser = GFFParser()
rec_dict = SeqIO.to_dict(parser.parse(self._ensembl_file))
assert len(rec_dict["I"].features) == 2
t_feature = rec_dict["I"].features[0]
assert len(t_feature.sub_features) == 32
示例6: t_gff3_noval_attrib
def t_gff3_noval_attrib(self):
"""Parse GFF3 file from NCBI with a key/value pair with no value.
"""
parser = GFFParser()
rec_dict = SeqIO.to_dict(parser.parse(self._test_ncbi))
assert len(rec_dict) == 1
t_feature = rec_dict.values()[0].features[0]
assert t_feature.qualifiers["pseudo"] == ["true"]
示例7: t_gff_annotations
def t_gff_annotations(self):
"""Check GFF annotations placed on an entire sequence.
"""
parser = GFFParser()
rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_ann_file))
final_rec = rec_dict['I']
assert len(final_rec.annotations.keys()) == 2
assert final_rec.annotations['source'] == ['Expr_profile']
assert final_rec.annotations['expr_profile'] == ['B0019.1']
示例8: t_gff3_iterator
def t_gff3_iterator(self):
"""Iterated parsing in GFF3 files with nested features.
"""
parser = GFFParser()
recs = [r for r in parser.parse_in_parts(self._test_gff_file,
target_lines=70)]
# should be one big set because we don't have a good place to split
assert len(recs) == 6
assert len(recs[0].features) == 59
示例9: t_solid_iterator
def t_solid_iterator(self):
"""Iterated parsing in a flat file without nested features.
"""
parser = GFFParser()
feature_sizes = []
for rec in parser.parse_in_parts(self._test_gff_file,
target_lines=5):
feature_sizes.append(len(rec.features))
assert len(feature_sizes) == 112
assert max(feature_sizes) == 1
示例10: t_gff2_iteration
def t_gff2_iteration(self):
"""Test iterated features with GFF2 files, breaking without parents.
"""
parser = GFFParser()
recs = []
for rec in parser.parse_in_parts(self._wormbase_file, target_lines=15):
recs.append(rec)
assert len(recs) == 4
assert recs[0].features[0].type == 'region'
assert recs[0].features[1].type == 'SAGE_tag'
assert len(recs[0].features[2].sub_features) == 29
示例11: t_wb_cds_nested_features
def t_wb_cds_nested_features(self):
"""Nesting of GFF2 features with a flat CDS key value pair.
"""
parser = GFFParser()
rec_dict = SeqIO.to_dict(parser.parse(self._wb_alt_file))
assert len(rec_dict) == 2
features = rec_dict.values()[1].features
assert len(features) == 1
tfeature = features[0]
assert tfeature.id == "cr01.sctg102.wum.2.1"
assert len(tfeature.sub_features) == 7
示例12: main
def main(in_file):
base, ext = os.path.splitext(in_file)
out_file = "%s.gff3" % (base)
in_handle = open(in_file)
out_handle = open(out_file, "w")
reader = GFFParser()
writer = GFF3Writer()
writer.write(reader.parse_in_parts(in_handle, target_lines=25000),
out_handle)
in_handle.close()
out_handle.close()
示例13: t_gff3_multiple_ids
def t_gff3_multiple_ids(self):
"""Deal with GFF3 with non-unique ID attributes, using NCBI example.
"""
parser = GFFParser()
rec_dict = SeqIO.to_dict(parser.parse(self._test_ncbi))
assert len(rec_dict) == 1
t_features = rec_dict.values()[0].features[1:]
# 4 feature sets, same ID, different positions, different attributes
assert len(t_features) == 4
for f in t_features:
assert len(f.sub_features) == 3
示例14: extract_seq
def extract_seq(gff_file,outfile):
'''for gff with seq attached goes through and parses out to seq rec as
fasta to a new file'''
in_handle = open(gff_file)
fasta_file = open(outfile,"w")
parser = GFFParser()
for rec in parser.parse(in_handle):#, limit_info=limit_info):
#rec_seq = rec.seq.tostring()
SeqIO.write(rec,fasta_file,"fasta")
in_handle.close()
fasta_file.close()
示例15: t_no_dict_error
def t_no_dict_error(self):
"""Ensure an error is raised when no dictionary to map to is present.
"""
parser = GFFParser(create_missing=False)
try:
for rec in parser.parse(self._test_gff_file):
pass
# no error -- problem
raise AssertionError('Did not complain with missing dictionary')
except KeyError:
pass