当前位置: 首页>>代码示例>>Python>>正文


Python GFF.GFFParser类代码示例

本文整理汇总了Python中BCBio.GFF.GFFParser的典型用法代码示例。如果您正苦于以下问题:Python GFFParser类的具体用法?Python GFFParser怎么用?Python GFFParser使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了GFFParser类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: not_t_full_celegans

    def not_t_full_celegans(self):
        """Test the full C elegans chromosome and GFF files.

        This is used to test GFF on large files and is not run as a standard
        test. You will need to download the files and adjust the paths
        to run this.
        """
        # read the sequence information
        seq_file = os.path.join(self._full_dir, "c_elegans.WS199.dna.fa")
        gff_file = os.path.join(self._full_dir, "c_elegans.WS199.gff3")
        seq_handle = open(seq_file)
        seq_dict = SeqIO.to_dict(SeqIO.parse(seq_handle, "fasta"))
        seq_handle.close()
        #with open(gff_file) as gff_handle:
        #    possible_limits = feature_adder.available_limits(gff_handle)
        #    pprint.pprint(possible_limits)
        rnai_types = [('Orfeome', 'PCR_product'),
                    ('GenePair_STS', 'PCR_product'),
                    ('Promoterome', 'PCR_product')]
        gene_types = [('Non_coding_transcript', 'gene'),
                      ('Coding_transcript', 'gene'),
                      ('Coding_transcript', 'mRNA'),
                      ('Coding_transcript', 'CDS')]
        limit_info = dict(gff_source_type = rnai_types + gene_types)
        parser = GFFParser()
        for rec in parser.parse(gff_file, seq_dict, limit_info=limit_info):
            pass
开发者ID:jamescasbon,项目名称:bcbb,代码行数:27,代码来源:test_GFFSeqIOFeatureAdder.py

示例2: gff_to_bed

def gff_to_bed(gff_file, bed_fh=sys.stdout, cds=True, species=None, rename=False):

    parser = GFFParser()
    seqids = parser.parse(gff_file, None)

    cur_chr = None
    cur_gene_order = 0
    for seqid in seqids:
        for feat in seqid.features:
            subf = feat.sub_features
            if feat.type in ("chromosome", "protein"):
                continue
            is_cds = any(f.type == "mRNA" or f.type == "CDS" for f in subf) and feat.type == "gene"
            if cds == is_cds:
                cur_gene_order += 1
                if species != None:
                    seqid_final = species + seqid.id[-2:]  # hard coded
                else:
                    seqid_final = seqid.id
                if rename:
                    if seqid.id != cur_chr:
                        cur_gene_order = 1
                        cur_chr = seqid.id
                    gene_name = seqid_final + "g" + "0" * (5 - len(str(cur_gene_order))) + str(cur_gene_order)
                else:
                    gene_name = feat.id

                print >> bed_fh, "\t".join(
                    str(x) for x in (seqid_final, int(str(feat.location.start)) + 1, feat.location.end, gene_name)
                )  # +1 is hard coded to current BCBio.GFF
开发者ID:Jingping,项目名称:BiteTools,代码行数:30,代码来源:gff_to_bed.py

示例3: t_unknown_seq

 def t_unknown_seq(self):
     """Prepare unknown base sequences with the correct length.
     """
     parser = GFFParser()
     rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file))
     assert len(rec_dict["I"].seq) == 12766937
     assert len(rec_dict["X"].seq) == 17718531
开发者ID:jamescasbon,项目名称:bcbb,代码行数:7,代码来源:test_GFFSeqIOFeatureAdder.py

示例4: t_fasta_directive

 def t_fasta_directive(self):
     """Parse FASTA sequence information contained in a GFF3 file.
     """
     parser = GFFParser()
     recs = SeqIO.to_dict(parser.parse(self._gff_file))
     assert len(recs) == 1
     test_rec = recs['chr17']
     assert str(test_rec.seq) == "GATTACAGATTACA"
开发者ID:jamescasbon,项目名称:bcbb,代码行数:8,代码来源:test_GFFSeqIOFeatureAdder.py

示例5: t_ensembl_nested_features

 def t_ensembl_nested_features(self):
     """Test nesting of features with GFF2 files using transcript_id.
     """
     parser = GFFParser()
     rec_dict = SeqIO.to_dict(parser.parse(self._ensembl_file))
     assert len(rec_dict["I"].features) == 2
     t_feature = rec_dict["I"].features[0]
     assert len(t_feature.sub_features) == 32
开发者ID:jamescasbon,项目名称:bcbb,代码行数:8,代码来源:test_GFFSeqIOFeatureAdder.py

示例6: t_gff3_noval_attrib

 def t_gff3_noval_attrib(self):
     """Parse GFF3 file from NCBI with a key/value pair with no value.
     """
     parser = GFFParser()
     rec_dict = SeqIO.to_dict(parser.parse(self._test_ncbi))
     assert len(rec_dict) == 1
     t_feature = rec_dict.values()[0].features[0]
     assert t_feature.qualifiers["pseudo"] == ["true"]
开发者ID:JCVI-Cloud,项目名称:VICVB,代码行数:8,代码来源:test_GFFSeqIOFeatureAdder.py

示例7: t_gff_annotations

 def t_gff_annotations(self):
     """Check GFF annotations placed on an entire sequence.
     """
     parser = GFFParser()
     rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_ann_file))
     final_rec = rec_dict['I']
     assert len(final_rec.annotations.keys()) == 2
     assert final_rec.annotations['source'] == ['Expr_profile']
     assert final_rec.annotations['expr_profile'] == ['B0019.1']
开发者ID:JCVI-Cloud,项目名称:VICVB,代码行数:9,代码来源:test_GFFSeqIOFeatureAdder.py

示例8: t_gff3_iterator

 def t_gff3_iterator(self):
     """Iterated parsing in GFF3 files with nested features.
     """
     parser = GFFParser()
     recs = [r for r in parser.parse_in_parts(self._test_gff_file,
         target_lines=70)]
     # should be one big set because we don't have a good place to split
     assert len(recs) == 6
     assert len(recs[0].features) == 59
开发者ID:JCVI-Cloud,项目名称:VICVB,代码行数:9,代码来源:test_GFFSeqIOFeatureAdder.py

示例9: t_solid_iterator

 def t_solid_iterator(self):
     """Iterated parsing in a flat file without nested features.
     """
     parser = GFFParser()
     feature_sizes = []
     for rec in parser.parse_in_parts(self._test_gff_file,
             target_lines=5):
         feature_sizes.append(len(rec.features))
     assert len(feature_sizes) == 112
     assert max(feature_sizes) == 1
开发者ID:JCVI-Cloud,项目名称:VICVB,代码行数:10,代码来源:test_GFFSeqIOFeatureAdder.py

示例10: t_gff2_iteration

 def t_gff2_iteration(self):
     """Test iterated features with GFF2 files, breaking without parents.
     """
     parser = GFFParser()
     recs = []
     for rec in parser.parse_in_parts(self._wormbase_file, target_lines=15):
         recs.append(rec)
     assert len(recs) == 4
     assert recs[0].features[0].type == 'region'
     assert recs[0].features[1].type == 'SAGE_tag'
     assert len(recs[0].features[2].sub_features) == 29
开发者ID:jamescasbon,项目名称:bcbb,代码行数:11,代码来源:test_GFFSeqIOFeatureAdder.py

示例11: t_wb_cds_nested_features

 def t_wb_cds_nested_features(self):
     """Nesting of GFF2 features with a flat CDS key value pair.
     """
     parser = GFFParser()
     rec_dict = SeqIO.to_dict(parser.parse(self._wb_alt_file))
     assert len(rec_dict) == 2
     features = rec_dict.values()[1].features
     assert len(features) == 1
     tfeature = features[0]
     assert tfeature.id == "cr01.sctg102.wum.2.1"
     assert len(tfeature.sub_features) == 7
开发者ID:jamescasbon,项目名称:bcbb,代码行数:11,代码来源:test_GFFSeqIOFeatureAdder.py

示例12: main

def main(in_file):
    base, ext = os.path.splitext(in_file)
    out_file = "%s.gff3" % (base)
    in_handle = open(in_file)
    out_handle = open(out_file, "w")
    reader = GFFParser()
    writer = GFF3Writer()
    writer.write(reader.parse_in_parts(in_handle, target_lines=25000),
            out_handle)
    in_handle.close()
    out_handle.close()
开发者ID:16NWallace,项目名称:bcbb,代码行数:11,代码来源:gff2_to_gff3.py

示例13: t_gff3_multiple_ids

 def t_gff3_multiple_ids(self):
     """Deal with GFF3 with non-unique ID attributes, using NCBI example.
     """
     parser = GFFParser()
     rec_dict = SeqIO.to_dict(parser.parse(self._test_ncbi))
     assert len(rec_dict) == 1
     t_features = rec_dict.values()[0].features[1:]
     # 4 feature sets, same ID, different positions, different attributes
     assert len(t_features) == 4
     for f in t_features:
         assert len(f.sub_features) == 3
开发者ID:JCVI-Cloud,项目名称:VICVB,代码行数:11,代码来源:test_GFFSeqIOFeatureAdder.py

示例14: extract_seq

def extract_seq(gff_file,outfile):
    '''for gff with seq attached goes through and parses out to seq rec as
    fasta to a new file'''
    in_handle = open(gff_file)
    fasta_file = open(outfile,"w")
    parser = GFFParser()
    for rec in parser.parse(in_handle):#, limit_info=limit_info):
        #rec_seq = rec.seq.tostring()
        SeqIO.write(rec,fasta_file,"fasta")
    in_handle.close()
    fasta_file.close()
开发者ID:dwheelerau,项目名称:modules,代码行数:11,代码来源:gff_parsing_tools.py

示例15: t_no_dict_error

 def t_no_dict_error(self):
     """Ensure an error is raised when no dictionary to map to is present.
     """
     parser = GFFParser(create_missing=False)
     try:
         for rec in parser.parse(self._test_gff_file):
             pass
         # no error -- problem
         raise AssertionError('Did not complain with missing dictionary')
     except KeyError:
         pass
开发者ID:JCVI-Cloud,项目名称:VICVB,代码行数:11,代码来源:test_GFFSeqIOFeatureAdder.py


注:本文中的BCBio.GFF.GFFParser类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。