当前位置: 首页>>代码示例>>Python>>正文


Python GFFParser.parse方法代码示例

本文整理汇总了Python中BCBio.GFF.GFFParser.parse方法的典型用法代码示例。如果您正苦于以下问题:Python GFFParser.parse方法的具体用法?Python GFFParser.parse怎么用?Python GFFParser.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在BCBio.GFF.GFFParser的用法示例。


在下文中一共展示了GFFParser.parse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: not_t_full_celegans

# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
    def not_t_full_celegans(self):
        """Test the full C elegans chromosome and GFF files.

        This is used to test GFF on large files and is not run as a standard
        test. You will need to download the files and adjust the paths
        to run this.
        """
        # read the sequence information
        seq_file = os.path.join(self._full_dir, "c_elegans.WS199.dna.fa")
        gff_file = os.path.join(self._full_dir, "c_elegans.WS199.gff3")
        seq_handle = open(seq_file)
        seq_dict = SeqIO.to_dict(SeqIO.parse(seq_handle, "fasta"))
        seq_handle.close()
        #with open(gff_file) as gff_handle:
        #    possible_limits = feature_adder.available_limits(gff_handle)
        #    pprint.pprint(possible_limits)
        rnai_types = [('Orfeome', 'PCR_product'),
                    ('GenePair_STS', 'PCR_product'),
                    ('Promoterome', 'PCR_product')]
        gene_types = [('Non_coding_transcript', 'gene'),
                      ('Coding_transcript', 'gene'),
                      ('Coding_transcript', 'mRNA'),
                      ('Coding_transcript', 'CDS')]
        limit_info = dict(gff_source_type = rnai_types + gene_types)
        parser = GFFParser()
        for rec in parser.parse(gff_file, seq_dict, limit_info=limit_info):
            pass
开发者ID:jamescasbon,项目名称:bcbb,代码行数:29,代码来源:test_GFFSeqIOFeatureAdder.py

示例2: t_unknown_seq

# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
 def t_unknown_seq(self):
     """Prepare unknown base sequences with the correct length.
     """
     parser = GFFParser()
     rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file))
     assert len(rec_dict["I"].seq) == 12766937
     assert len(rec_dict["X"].seq) == 17718531
开发者ID:jamescasbon,项目名称:bcbb,代码行数:9,代码来源:test_GFFSeqIOFeatureAdder.py

示例3: t_gff3_to_gff3

# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
    def t_gff3_to_gff3(self):
        """Read in and write out GFF3 without any loss of information.
        """
        parser = GFFParser()
        recs = SeqIO.to_dict(parser.parse(self._test_gff_file))
        out_handle = StringIO.StringIO()
        writer = GFF3Writer()
        writer.write(recs.values(), out_handle)
        wrote_handle = StringIO.StringIO(out_handle.getvalue())
        recs_two = SeqIO.to_dict(parser.parse(wrote_handle))

        orig_rec = recs.values()[0]
        re_rec = recs.values()[0]
        assert len(orig_rec.features) == len(re_rec.features)
        for i, orig_f in enumerate(orig_rec.features):
            assert str(orig_f) == str(re_rec.features[i])
开发者ID:jamescasbon,项目名称:bcbb,代码行数:18,代码来源:test_GFFSeqIOFeatureAdder.py

示例4: gff_to_bed

# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
def gff_to_bed(gff_file, bed_fh=sys.stdout, cds=True, species=None, rename=False):

    parser = GFFParser()
    seqids = parser.parse(gff_file, None)

    cur_chr = None
    cur_gene_order = 0
    for seqid in seqids:
        for feat in seqid.features:
            subf = feat.sub_features
            if feat.type in ("chromosome", "protein"):
                continue
            is_cds = any(f.type == "mRNA" or f.type == "CDS" for f in subf) and feat.type == "gene"
            if cds == is_cds:
                cur_gene_order += 1
                if species != None:
                    seqid_final = species + seqid.id[-2:]  # hard coded
                else:
                    seqid_final = seqid.id
                if rename:
                    if seqid.id != cur_chr:
                        cur_gene_order = 1
                        cur_chr = seqid.id
                    gene_name = seqid_final + "g" + "0" * (5 - len(str(cur_gene_order))) + str(cur_gene_order)
                else:
                    gene_name = feat.id

                print >> bed_fh, "\t".join(
                    str(x) for x in (seqid_final, int(str(feat.location.start)) + 1, feat.location.end, gene_name)
                )  # +1 is hard coded to current BCBio.GFF
开发者ID:Jingping,项目名称:BiteTools,代码行数:32,代码来源:gff_to_bed.py

示例5: t_ensembl_nested_features

# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
 def t_ensembl_nested_features(self):
     """Test nesting of features with GFF2 files using transcript_id.
     """
     parser = GFFParser()
     rec_dict = SeqIO.to_dict(parser.parse(self._ensembl_file))
     assert len(rec_dict["I"].features) == 2
     t_feature = rec_dict["I"].features[0]
     assert len(t_feature.sub_features) == 32
开发者ID:jamescasbon,项目名称:bcbb,代码行数:10,代码来源:test_GFFSeqIOFeatureAdder.py

示例6: t_gff3_noval_attrib

# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
 def t_gff3_noval_attrib(self):
     """Parse GFF3 file from NCBI with a key/value pair with no value.
     """
     parser = GFFParser()
     rec_dict = SeqIO.to_dict(parser.parse(self._test_ncbi))
     assert len(rec_dict) == 1
     t_feature = rec_dict.values()[0].features[0]
     assert t_feature.qualifiers["pseudo"] == ["true"]
开发者ID:JCVI-Cloud,项目名称:VICVB,代码行数:10,代码来源:test_GFFSeqIOFeatureAdder.py

示例7: t_fasta_directive

# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
 def t_fasta_directive(self):
     """Parse FASTA sequence information contained in a GFF3 file.
     """
     parser = GFFParser()
     recs = SeqIO.to_dict(parser.parse(self._gff_file))
     assert len(recs) == 1
     test_rec = recs['chr17']
     assert str(test_rec.seq) == "GATTACAGATTACA"
开发者ID:jamescasbon,项目名称:bcbb,代码行数:10,代码来源:test_GFFSeqIOFeatureAdder.py

示例8: t_gff_annotations

# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
 def t_gff_annotations(self):
     """Check GFF annotations placed on an entire sequence.
     """
     parser = GFFParser()
     rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_ann_file))
     final_rec = rec_dict['I']
     assert len(final_rec.annotations.keys()) == 2
     assert final_rec.annotations['source'] == ['Expr_profile']
     assert final_rec.annotations['expr_profile'] == ['B0019.1']
开发者ID:JCVI-Cloud,项目名称:VICVB,代码行数:11,代码来源:test_GFFSeqIOFeatureAdder.py

示例9: t_no_dict_error

# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
 def t_no_dict_error(self):
     """Ensure an error is raised when no dictionary to map to is present.
     """
     parser = GFFParser(create_missing=False)
     try:
         for rec in parser.parse(self._test_gff_file):
             pass
         # no error -- problem
         raise AssertionError('Did not complain with missing dictionary')
     except KeyError:
         pass
开发者ID:JCVI-Cloud,项目名称:VICVB,代码行数:13,代码来源:test_GFFSeqIOFeatureAdder.py

示例10: get_feature_cord

# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
def get_feature_cord(gff_file,user_feature="gene"):
    """returns st, stop of a feature with orintation as tuple
    in a list to account for CDS
     in a dictionary with gene id as key:[(st,stop,orin)]
     freature can be one of 'protein','gene','mRNA','CDS','exon'
     user_feature="gene"
     a_dict['FBgn0031208']= [(7528, 9484, 1, '2L')]
     user_feature = "mRNA".
     a_dict['FBgn0031208']=[(7528, 9484, 1, 'FBtr0300689', '2L'),
      (7528, 9484, 1, 'FBtr0300690', '2L'),
       (7528, 9484, 1, 'FBtr0330654', '2L')]
    user_feature = "CDS"
    a_dict['FBgn0031208']= [([(7679, 8116), (8192, 8610)], 1, 'FBtr0300689', '2L'),
      ([(7679, 8116), (8192, 8589), (8667, 9276)], 1, 'FBtr0300690', '2L'),
        ([(7679, 8116), (8228, 8610)], 1, 'FBtr0330654', '2L')]
     """
    limit_info = dict(gff_type = ['protein','gene','mRNA','CDS','exon'])
    feature_dict = {}
    parser = GFFParser()
    in_handle = open(gff_file)
    for rec in parser.parse(in_handle,limit_info=limit_info):
        rec_id = rec.id
        for feat in rec.features:
            if feat.type == "gene":
                gene_id = feat.id
                if user_feature == "gene":
                    #gene_id = feat.id
                    assert gene_id not in feature_dict
                    feature_dict[gene_id]=[(feat.location.start.position,
                        feat.location.end.position,feat.strand,rec_id)]
                else:
                    for sub in feat.sub_features:
                        if sub.type == "mRNA":
                            if user_feature == "mRNA":
                                info = (sub.location.start.position,
                                        sub.location.end.position,sub.strand,sub.id,rec_id)
                                if gene_id in feature_dict:
                                    feature_dict[gene_id].append(info)
                                else:
                                    feature_dict[gene_id] = [info]
                            else:
                                codons = []
                                for sub_sub in sub.sub_features:
                                    if sub_sub.type == "CDS":
                                        st = sub_sub.location.start.position
                                        end = sub_sub.location.end.position
                                        codons.append((st,end))
                                info = (codons,sub.strand,sub.id,rec_id)
                                if gene_id in feature_dict:
                                    feature_dict[gene_id].append(info)
                                else:
                                    feature_dict[gene_id] = [info]
    in_handle.close()
    return feature_dict
开发者ID:dwheelerau,项目名称:modules,代码行数:56,代码来源:gff_parsing_tools.py

示例11: extract_seq

# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
def extract_seq(gff_file,outfile):
    '''for gff with seq attached goes through and parses out to seq rec as
    fasta to a new file'''
    in_handle = open(gff_file)
    fasta_file = open(outfile,"w")
    parser = GFFParser()
    for rec in parser.parse(in_handle):#, limit_info=limit_info):
        #rec_seq = rec.seq.tostring()
        SeqIO.write(rec,fasta_file,"fasta")
    in_handle.close()
    fasta_file.close()
开发者ID:dwheelerau,项目名称:modules,代码行数:13,代码来源:gff_parsing_tools.py

示例12: t_gff3_multiple_ids

# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
 def t_gff3_multiple_ids(self):
     """Deal with GFF3 with non-unique ID attributes, using NCBI example.
     """
     parser = GFFParser()
     rec_dict = SeqIO.to_dict(parser.parse(self._test_ncbi))
     assert len(rec_dict) == 1
     t_features = rec_dict.values()[0].features[1:]
     # 4 feature sets, same ID, different positions, different attributes
     assert len(t_features) == 4
     for f in t_features:
         assert len(f.sub_features) == 3
开发者ID:JCVI-Cloud,项目名称:VICVB,代码行数:13,代码来源:test_GFFSeqIOFeatureAdder.py

示例13: t_wb_cds_nested_features

# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
 def t_wb_cds_nested_features(self):
     """Nesting of GFF2 features with a flat CDS key value pair.
     """
     parser = GFFParser()
     rec_dict = SeqIO.to_dict(parser.parse(self._wb_alt_file))
     assert len(rec_dict) == 2
     features = rec_dict.values()[1].features
     assert len(features) == 1
     tfeature = features[0]
     assert tfeature.id == "cr01.sctg102.wum.2.1"
     assert len(tfeature.sub_features) == 7
开发者ID:jamescasbon,项目名称:bcbb,代码行数:13,代码来源:test_GFFSeqIOFeatureAdder.py

示例14: t_local_map_reduce

# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
 def t_local_map_reduce(self):
     """General map reduce framework without parallelization.
     """
     cds_limit_info = dict(
             gff_type = ["gene", "mRNA", "CDS"],
             gff_id = ['I']
             )
     parser = GFFParser()
     rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file,
         limit_info=cds_limit_info))
     test_rec = rec_dict['I']
     assert len(test_rec.features) == 32
开发者ID:jamescasbon,项目名称:bcbb,代码行数:14,代码来源:test_GFFSeqIOFeatureAdder.py

示例15: t_jgi_gff

# 需要导入模块: from BCBio.GFF import GFFParser [as 别名]
# 或者: from BCBio.GFF.GFFParser import parse [as 别名]
 def t_jgi_gff(self):
     """Parsing of JGI formatted GFF2, nested using transcriptId and proteinID
     """
     parser = GFFParser()
     rec_dict = SeqIO.to_dict(parser.parse(self._jgi_file))
     tfeature = rec_dict['chr_1'].features[0]
     assert tfeature.location.nofuzzy_start == 37060
     assert tfeature.location.nofuzzy_end == 38216
     assert tfeature.type == 'inferred_parent'
     assert len(tfeature.sub_features) == 6
     sfeature = tfeature.sub_features[1]
     assert sfeature.qualifiers['proteinId'] == ['873']
     assert sfeature.qualifiers['phase'] == ['0']
开发者ID:jamescasbon,项目名称:bcbb,代码行数:15,代码来源:test_GFFSeqIOFeatureAdder.py


注:本文中的BCBio.GFF.GFFParser.parse方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。