当前位置: 首页>>代码示例>>Python>>正文


Python Multiparser.combine_fasta方法代码示例

本文整理汇总了Python中annogesiclib.multiparser.Multiparser.combine_fasta方法的典型用法代码示例。如果您正苦于以下问题:Python Multiparser.combine_fasta方法的具体用法?Python Multiparser.combine_fasta怎么用?Python Multiparser.combine_fasta使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在annogesiclib.multiparser.Multiparser的用法示例。


在下文中一共展示了Multiparser.combine_fasta方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: sORFDetection

# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_fasta [as 别名]

#.........这里部分代码省略.........
                           os.path.join(args_sorf.wig_path,
                           "_".join([prefix, "forward.wig"])),
                           os.path.join(args_sorf.wig_path,
                           "_".join([prefix, "reverse.wig"])),
                           os.path.join(self.gff_output, self.all_cand,
                           "_".join([prefix, "sORF"])), args_sorf)
            if "_".join([prefix, "sORF_all.gff"]) in os.listdir(
                         os.path.join(self.gff_output, self.all_cand)):
                gff_all = os.path.join(self.gff_output, self.all_cand,
                                       "_".join([prefix, "sORF.gff"]))
                gff_best = os.path.join(self.gff_output, self.best,
                                        "_".join([prefix, "sORF.gff"]))
                csv_all = os.path.join(self.table_output, self.all_cand,
                                       "_".join([prefix, "sORF.csv"]))
                csv_best =  os.path.join(self.table_output, self.best,
                                         "_".join([prefix, "sORF.csv"]))
                shutil.move(os.path.join(self.gff_output, self.all_cand,
                            "_".join([prefix, "sORF_all.gff"])), gff_all)
                shutil.move(os.path.join(self.gff_output, self.all_cand,
                            "_".join([prefix, "sORF_best.gff"])), gff_best)
                shutil.move(os.path.join(self.gff_output, self.all_cand,
                            "_".join([prefix, "sORF_all.csv"])), csv_all)
                shutil.move(os.path.join(self.gff_output, self.all_cand,
                            "_".join([prefix, "sORF_best.csv"])), csv_best)
                log.write("\t" + gff_all + "\n")
                log.write("\t" + gff_best + "\n")
                log.write("\t" + csv_all + "\n")
                log.write("\t" + csv_best + "\n")

    def _remove_tmp(self, args_sorf):
        self.helper.remove_all_content(args_sorf.out_folder, ".gff", "file")
        self.helper.remove_tmp_dir(args_sorf.fastas)
        self.helper.remove_tmp_dir(args_sorf.gffs)
        self.helper.remove_tmp_dir(args_sorf.tsss)
        self.helper.remove_tmp_dir(args_sorf.trans)
        self.helper.remove_tmp_dir(args_sorf.srnas)
        if "temp_wig" in os.listdir(args_sorf.out_folder):
            shutil.rmtree(os.path.join(args_sorf.out_folder, "temp_wig"))
        if "merge_wigs" in os.listdir(args_sorf.out_folder):
            shutil.rmtree(os.path.join(args_sorf.out_folder, "merge_wigs"))

    def _compare_tran_cds(self, args_sorf, log):
        '''compare transcript and CDS to find the intergenic region'''
        prefixs = []
        log.write("Running sORF_intergenic.py to extract the sequences of "
                  "potential sORFs\n")
        for gff in os.listdir(args_sorf.gffs):
            if gff.endswith(".gff"):
                prefix = gff.replace(".gff", "")
                prefixs.append(prefix)
                print("Comparing transcripts and CDSs of {0}".format(prefix))
                get_intergenic(os.path.join(args_sorf.gffs, gff),
                               os.path.join(self.tran_path,
                               "_".join([prefix, "transcript.gff"])),
                               os.path.join(args_sorf.out_folder,
                               "_".join([prefix, "inter.gff"])),
                               args_sorf.utr_detect, args_sorf.hypo,
                               args_sorf.extend_5, args_sorf.extend_3)
                log.write("\t" + os.path.join(args_sorf.out_folder,
                          "_".join([prefix, "inter.gff"])) + 
                          " is generated to temporary store the sequences.\n")
        return prefixs

    def _re_table(self, args_sorf, prefixs, log):
        log.write("Running re_table.py for generating coverage information.\n")
        log.write("The following files are updated:\n")
        for type_ in ["all_candidates", "best_candidates"]:
            for prefix in prefixs:
                table_file = os.path.join(args_sorf.out_folder, "tables",
                                          type_, "_".join([
                                          prefix, "sORF.csv"]))
                reorganize_table(args_sorf.libs, args_sorf.merge_wigs,
                                 "Track_detail", table_file)
                log.write("\t" + table_file + "\n")

    def run_sorf_detection(self, args_sorf, log):
        if args_sorf.fuzzy_rbs > 6:
            log.write("--fuzzy_rbs should be equal or less than 6!\n")
            print("Error: --fuzzy_rbs should be equal or less than 6!")
            sys.exit()
        self._check_necessary_files(args_sorf, log)
        self.multiparser.parser_gff(args_sorf.trans, "transcript")
        self.multiparser.combine_gff(args_sorf.gffs, self.tran_path,
                                     None, "transcript")
        self.multiparser.parser_fasta(args_sorf.fastas)
        self.multiparser.combine_fasta(args_sorf.gffs, self.fasta_path, None)
        prefixs = self._compare_tran_cds(args_sorf, log)
        self._start_stop_codon(prefixs, args_sorf, log)
        log.write("Running stat_sorf.py to do statistics.\n")
        for sorf in os.listdir(os.path.join(self.gff_output, self.all_cand)):
            print("Running statistics of {0}".format(sorf))
            if sorf.endswith("_sORF.gff"):
                stat_file = os.path.join(args_sorf.out_folder, "statistics",
                            "_".join(["stat", sorf.replace(".gff", ".csv")]))
                stat(os.path.join(self.gff_output, self.all_cand, sorf),
                     os.path.join(self.gff_output, self.best, sorf), stat_file,
                     args_sorf.utr_detect)
                log.write("\t" + stat_file + " is generated.\n")
        self._re_table(args_sorf, prefixs, log)
        self._remove_tmp(args_sorf)
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:104,代码来源:sorf.py

示例2: TestMultiparser

# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_fasta [as 别名]
class TestMultiparser(unittest.TestCase):

    def setUp(self):
        self.multiparser = Multiparser()
        self.example = Example()
        self.ref_folder = "ref_folder"
        if (not os.path.exists(self.ref_folder)):
            os.mkdir(self.ref_folder)
        self.tar_folder = "tar_folder"
        if (not os.path.exists(self.tar_folder)):
            os.mkdir(self.tar_folder)

    def tearDown(self):
        if os.path.exists(self.ref_folder):
            shutil.rmtree(self.ref_folder)
        if os.path.exists(self.tar_folder):
            shutil.rmtree(self.tar_folder)

    def test_combine_fasta(self):
        tmp_tar = os.path.join(self.tar_folder, "tmp")
        tmp_ref = os.path.join(self.ref_folder, "test.gff_folder")
        os.mkdir(tmp_ref)
        os.mkdir(tmp_tar)
        sub_fasta1 = os.path.join(tmp_tar, "aaa.fa")
        with open(sub_fasta1, "w") as rh:
            rh.write(self.example.sub_fasta1)
        sub_fasta2 = os.path.join(tmp_tar, "bbb.fa")
        with open(sub_fasta2, "w") as rh:
            rh.write(self.example.sub_fasta2)
        sub_gff1 = os.path.join(tmp_ref, "aaa.gff")
        with open(sub_gff1, "w") as rh:
            rh.write(self.example.sub_gff1)
        sub_gff2 = os.path.join(tmp_ref, "bbb.gff")
        with open(sub_gff2, "w") as rh:
            rh.write(self.example.sub_gff2)
        self.multiparser.combine_fasta(self.ref_folder, tmp_tar, None)
        self.assertTrue(os.path.exists(os.path.join(tmp_tar, "test.fa")))

    def test_combine_wig(self):
        tmp_tar = os.path.join(self.tar_folder, "tmp")
        tmp_ref = os.path.join(self.ref_folder, "test.fa_folder")
        os.mkdir(tmp_ref)
        os.mkdir(tmp_tar)
        sub_fasta1 = os.path.join(tmp_ref, "aaa.fa")
        with open(sub_fasta1, "w") as rh:
            rh.write(self.example.sub_fasta1)
        sub_fasta2 = os.path.join(tmp_ref, "bbb.fa")
        with open(sub_fasta2, "w") as rh:
            rh.write(self.example.sub_fasta2)
        sub_wig1 = os.path.join(tmp_tar, "test_forward.wig_STRAIN_aaa.wig")
        sub_wig2 = os.path.join(tmp_tar, "test_forward.wig_STRAIN_bbb.wig")
        sub_wig3 = os.path.join(tmp_tar, "test_reverse.wig_STRAIN_aaa.wig")
        sub_wig4 = os.path.join(tmp_tar, "test_reverse.wig_STRAIN_bbb.wig")
        wig_files = [sub_wig1, sub_wig2, sub_wig3, sub_wig4]
        example_wigs = [self.example.sub_f_wig1, self.example.sub_f_wig2,
                        self.example.sub_r_wig1, self.example.sub_r_wig2]
        for index in range(0, 4):
            with open(wig_files[index], "w") as fh:
                fh.write(example_wigs[index])
        libs = ["test_forward.wig_STRAIN_aaa.wig:frag:1:a:+", "test_reverse.wig_STRAIN_aaa.wig:frag:1:a:-"]
        self.multiparser.combine_wig(self.ref_folder, tmp_tar, "fasta", libs)
        self.assertTrue(os.path.exists(os.path.join(tmp_tar, "test_forward.wig")))
        self.assertTrue(os.path.exists(os.path.join(tmp_tar, "test_reverse.wig")))

    def test_combine_gff(self):
        tmp_tar = os.path.join(self.tar_folder, "tmp")
        tmp_ref = os.path.join(self.ref_folder, "test.fa_folder")
        os.mkdir(tmp_ref)
        os.mkdir(tmp_tar)
        sub_fasta1 = os.path.join(tmp_ref, "aaa.fa")
        with open(sub_fasta1, "w") as rh:
            rh.write(self.example.sub_fasta1)
        sub_fasta2 = os.path.join(tmp_ref, "bbb.fa")
        with open(sub_fasta2, "w") as rh:
            rh.write(self.example.sub_fasta2)
        sub_gff1 = os.path.join(tmp_tar, "aaa.gff")
        with open(sub_gff1, "w") as rh:
            rh.write(self.example.sub_gff1)
        sub_gff2 = os.path.join(tmp_tar, "bbb.gff")
        with open(sub_gff2, "w") as rh:
            rh.write(self.example.sub_gff2)
        self.multiparser.combine_gff(self.ref_folder, tmp_tar, "fasta", None)
        self.assertTrue(os.path.exists(os.path.join(tmp_tar, "test.gff")))

    def test_parser_fasta(self):
        fasta_file = os.path.join(self.ref_folder, "test.fa")
        with open(fasta_file, "w") as rh:
            rh.write(self.example.fasta_file)
        self.multiparser.parser_fasta(self.ref_folder)
        self.assertTrue(os.path.exists(os.path.join(self.ref_folder, "tmp/aaa.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.ref_folder, "tmp/bbb.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.ref_folder, "test.fa_folder/aaa.fa")))
        self.assertTrue(os.path.exists(os.path.join(self.ref_folder, "test.fa_folder/bbb.fa")))

    def test_parser_gff(self):
        gff_file = os.path.join(self.ref_folder, "test.gff")
        with open(gff_file, "w") as rh:
            rh.write(self.example.gff_file)
        self.multiparser.parser_gff(self.ref_folder, None)
        self.assertTrue(os.path.exists(os.path.join(self.ref_folder, "tmp/aaa.gff")))
#.........这里部分代码省略.........
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:103,代码来源:test_multiparser.py

示例3: sRNADetection

# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_fasta [as 别名]

#.........这里部分代码省略.........
            if (args_srna.tss_folder is None):
                print("Error: lack required TSS files for UTR "
                      "derived sRNA detection!!!!")
                sys.exit()
            if (args_srna.pro_folder is None):
                print("Warning: lack Processing site files for UTR "
                      "derived sRNA detection!!!")
                print("it may effect the results!!!!")
        self._check_gff(args_srna.gffs)
        self._check_gff(args_srna.trans)
        if args_srna.tss_folder is not None:
            self._check_gff(args_srna.tss_folder)
            self.multiparser.parser_gff(args_srna.tss_folder, "TSS")
            self.multiparser.combine_gff(args_srna.gffs, self.tss_path,
                                         None, "TSS")
        if args_srna.pro_folder is not None:
            self._check_gff(args_srna.pro_folder)
            self.multiparser.parser_gff(args_srna.pro_folder, "processing")
            self.multiparser.combine_gff(args_srna.gffs, self.pro_path,
                                         None, "processing")
        if args_srna.sorf_file is not None:
            self._check_gff(args_srna.sorf_file)
            self.multiparser.parser_gff(args_srna.sorf_file, "sORF")
            self.multiparser.combine_gff(args_srna.gffs, self.sorf_path,
                                         None, "sORF")
        if args_srna.utr_srna or ("sec_str" in args_srna.import_info) or (
           "blast_nr" in args_srna.import_info) or (
           "blast_srna" in args_srna.import_info):
            if args_srna.fastas is None:
                print("Error: lack required fasta files for UTR "
                      "derived sRNA detection!!!!")
                sys.exit()
            self.multiparser.parser_fasta(args_srna.fastas)
            self.multiparser.combine_fasta(args_srna.gffs,
                                           self.fasta_path, None)
        if args_srna.terms is not None:
            self._check_gff(args_srna.terms)
            self.multiparser.parser_gff(args_srna.terms, "term")
            self.multiparser.combine_gff(args_srna.gffs, self.term_path,
                                         None, "term")
        else:
            self.term_path = None

    def _run_program(self, args_srna):
        prefixs = []
        tss = None
        for gff in os.listdir(args_srna.gffs):
            if gff.endswith(".gff"):
                prefix = gff.replace(".gff", "")
                prefixs.append(prefix)
                print("Running sRNA detection of {0}....".format(prefix))
                tran = self.helper.get_correct_file(
                        self.tran_path, "_transcript.gff", prefix, None, None)
                gffs = {"merge": "_".join([self.prefixs["merge"], prefix]),
                        "utr": "_".join([self.prefixs["utr"], prefix]),
                        "normal": "_".join([self.prefixs["normal"], prefix])}
                csvs = {"merge": "_".join([
                            self.prefixs["merge_table"], prefix]),
                        "utr": "_".join([self.prefixs["utr_table"], prefix]),
                        "normal": "_".join([
                            self.prefixs["normal_table"], prefix])}
                tss = self._run_normal(
                        prefix, gff, tran, args_srna.fuzzy_tsss["inter"],
                        args_srna)
                if args_srna.utr_srna:
                    print("Running UTR derived sRNA detection of {0}".format(
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:70,代码来源:srna.py

示例4: TSSpredator

# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_fasta [as 别名]

#.........这里部分代码省略.........
                    if tss.endswith("_processing.gff"):
                        ref = self.helper.get_correct_file(
                                args_tss.overlap_gffs, "_TSS.gff",
                                tss.replace("_processing.gff", ""), None, None)
                        filter_tss_pro(os.path.join(out_folder, tss),
                                       ref, args_tss.program,
                                       args_tss.cluster)

    def _low_expression(self, args_tss, gff_folder):
        '''deal with the low expressed TSS'''
        prefix = None
        self._merge_wigs(args_tss.wig_folder, "wig", args_tss.libs)
        for gff in os.listdir(gff_folder):
            if (args_tss.program.lower() == "tss") and (
                    gff.endswith("_TSS.gff")):
                prefix = gff.replace("_TSS.gff", "")
            elif (args_tss.program.lower() == "processing") and (
                    gff.endswith("_processing.gff")):
                prefix = gff.replace("_processing.gff", "")
            if prefix:
                out = open(os.path.join(
                    self.stat_outfolder, prefix, "_".join([
                        "stat", prefix, "low_expression_cutoff.csv"])), "w")
                out.write("\t".join(["Genome", "Cutoff_coverage"]) + "\n")
                cutoff = filter_low_expression(
                        os.path.join(gff_folder, gff), args_tss,
                        "tmp/merge_forward.wig", "tmp/merge_reverse.wig",
                        "tmp/without_low_expression.gff")
                out.write("\t".join([prefix, str(cutoff)]) + "\n")
                os.remove(os.path.join(gff_folder, gff))
                shutil.move("tmp/without_low_expression.gff",
                            os.path.join(gff_folder, gff))
                prefix = None
        out.close()

    def run_tsspredator(self, args_tss, log):
        input_folder = os.path.join(args_tss.out_folder, "configs")
        for gff in os.listdir(args_tss.gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(
                                                 args_tss.gffs, gff))
        self.helper.check_make_folder(self.gff_outfolder)
        self.multiparser.parser_fasta(args_tss.fastas)
        self.multiparser.parser_gff(args_tss.gffs, None)
        self.multiparser.parser_wig(args_tss.wig_folder)
        prefixs = self._set_gen_config(args_tss, input_folder, log)
        for prefix in prefixs:
            out_path = os.path.join(
                    self.master, "_".join(["MasterTable", prefix]))
            config_file = os.path.join(
                    input_folder, "_".join(["config", prefix]) + ".ini")
            self._start_to_run(args_tss.tsspredator_path, config_file,
                               out_path, prefix, log)
            if os.path.exists(os.path.join(out_path, "TSSstatistics.tsv")):
                shutil.move(os.path.join(out_path, "TSSstatistics.tsv"),
                            os.path.join(
                                self.stat_outfolder, "TSSstatistics.tsv"))
        if args_tss.program.lower() == "ps":
            args_tss.program = "processing"
        self._convert_gff(prefixs, args_tss, log)
        if args_tss.check_orphan:
            print("checking the orphan TSSs")
            log.write("Running check_orphan.py to re-check orphan TSSs.\n")
            self._check_orphan(prefixs,
                               os.path.join(args_tss.wig_folder, "tmp"),
                               args_tss)
        self.multiparser.combine_gff(args_tss.gffs, self.gff_outfolder,
                                     None, args_tss.program)
        datas = []
        for gff in os.listdir(self.gff_outfolder):
            if gff.endswith(".gff"):
                gff_folder = gff.replace("".join(["_", args_tss.program,
                                                  ".gff"]), "")
                self.helper.check_make_folder(
                     os.path.join(self.stat_outfolder, gff_folder))
                datas.append(gff_folder)
        if args_tss.remove_low_expression is not None:
            log.write("Running filter_low_expression.py to filter out "
                      "low expressed TSS/PS.\n")
            self._low_expression(args_tss, self.gff_outfolder)
        if args_tss.manual is not None:
            self.multiparser.parser_gff(args_tss.manual, None)
            self.multiparser.combine_gff(args_tss.gffs, self.manual_path,
                                         None, None)
            self.multiparser.combine_fasta(args_tss.gffs, self.fasta_path,
                                         None)
            self.multiparser.combine_wig(args_tss.gffs, self.wig_path,
                                         None, args_tss.libs)
            log.write("Running merge_manual.py to merge the manual TSSs.\n")
            self._merge_manual(datas, args_tss)
        log.write("Running filter_TSS_pro.py to deal with the overlap "
                  "position between TSS and PS.\n")
        self._deal_with_overlap(self.gff_outfolder, args_tss)
        log.write("Running stat_TSSpredator.py to do statistics.\n")
        self._stat_tss(datas, args_tss.program, log)
        if args_tss.validate:
            self._validate(datas, args_tss, log)
        if args_tss.ta_files is not None:
            self._compare_ta(datas, args_tss, log)
        self._remove_files(args_tss)
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:104,代码来源:tsspredator.py


注:本文中的annogesiclib.multiparser.Multiparser.combine_fasta方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。