当前位置: 首页>>代码示例>>Python>>正文


Python Multiparser.parser_gff方法代码示例

本文整理汇总了Python中annogesiclib.multiparser.Multiparser.parser_gff方法的典型用法代码示例。如果您正苦于以下问题:Python Multiparser.parser_gff方法的具体用法?Python Multiparser.parser_gff怎么用?Python Multiparser.parser_gff使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在annogesiclib.multiparser.Multiparser的用法示例。


在下文中一共展示了Multiparser.parser_gff方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: OperonDetection

# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_gff [as 别名]
class OperonDetection(object):
    '''detection of operon'''

    def __init__(self, args_op):
        self.multiparser = Multiparser()
        self.helper = Helper()
        if args_op.tsss is not None:
            self.tss_path = os.path.join(args_op.tsss, "tmp")
        else:
            self.tss_path = None
        self.tran_path = os.path.join(args_op.trans, "tmp")
        self.table_path = os.path.join(args_op.output_folder, "tables")
        if args_op.terms is not None:
            self._check_gff(args_op.terms, "term")
            self.term_path = os.path.join(args_op.terms, "tmp")
        else:
            self.term_path = None

    def _check_gff(self, gffs, type_):
        for gff in os.listdir(gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(gffs, gff))

    def _detect_operon(self, prefixs, args_op, log):
        log.write("Running detect_operon.py to detect operon.\n")
        log.write("The the following files are generated:\n")
        for prefix in prefixs:
            out_gff = os.path.join(args_op.output_folder, "gffs",
                                    "_".join([prefix, "operon.gff"]))
            out_table = os.path.join(self.table_path,
                                     "_".join([prefix, "operon.csv"]))
            print("Detecting operons of {0}".format(prefix))
            if self.tss_path is None:
                tss = False
            else:
                tss = self.helper.get_correct_file(
                        self.tss_path, "_TSS.gff", prefix, None, None)
            tran = self.helper.get_correct_file(
                    self.tran_path, "_transcript.gff", prefix, None, None)
            gff = self.helper.get_correct_file(
                    args_op.gffs, ".gff", prefix, None, None)
            if self.term_path is None:
                term = False
            else:
                term = self.helper.get_correct_file(
                        self.term_path, "_term.gff", prefix, None, None)
            operon(tran, tss, gff, term, args_op.tss_fuzzy,
                   args_op.term_fuzzy, args_op.length, out_table, out_gff)
            log.write("\t" + out_table + "\n")
            log.write("\t" + out_gff + "\n")

    def _check_and_parser_gff(self, args_op):
        self._check_gff(args_op.gffs, "gff")
        self._check_gff(args_op.trans, "tran")
        self.multiparser.parser_gff(args_op.gffs, None)
        self.multiparser.parser_gff(args_op.trans, "transcript")
        self.multiparser.combine_gff(args_op.gffs, self.tran_path,
                                     None, "transcript")
        if args_op.tsss is not None:
            self._check_gff(args_op.tsss, "tss")
            self.multiparser.parser_gff(args_op.tsss, "TSS")
            self.multiparser.combine_gff(args_op.gffs, self.tss_path, None, "TSS")
        if args_op.terms is not None:
            self._check_gff(args_op.terms, "term")
            self.multiparser.parser_gff(args_op.terms, "term")
            self.multiparser.combine_gff(args_op.gffs, self.term_path,
                                         None, "term")

    def _stat(self, table_path, stat_folder, log):
        log.write("Running stat_operon.py to do statistics.\n")
        for table in os.listdir(table_path):
            if table.endswith("_operon.csv"):
                filename = "_".join(["stat", table])
                out_stat = os.path.join(stat_folder, filename)
                stat(os.path.join(table_path, table), out_stat)
                log.write("\t" + out_stat + "\n")


    def run_operon(self, args_op, log):
        self._check_and_parser_gff(args_op)
        prefixs = []
        for gff in os.listdir(args_op.gffs):
            if gff.endswith(".gff"):
                prefixs.append(gff.replace(".gff", ""))
        self._detect_operon(prefixs, args_op, log)
        self._stat(self.table_path, args_op.stat_folder, log)
        self.helper.remove_tmp_dir(args_op.gffs)
        self.helper.remove_tmp_dir(args_op.tsss)
        self.helper.remove_tmp_dir(args_op.trans)
        if args_op.terms is not None:
            self.helper.remove_tmp_dir(args_op.terms)
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:93,代码来源:operon.py

示例2: OperonDetection

# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_gff [as 别名]
class OperonDetection(object):

    def __init__(self, args_op):
        self.multiparser = Multiparser()
        self.helper = Helper()
        self.tss_path = os.path.join(args_op.tsss, "tmp")
        self.tran_path = os.path.join(args_op.trans, "tmp")
        self.utr5_path = os.path.join(args_op.utr5s, "tmp")
        self.utr3_path = os.path.join(args_op.utr3s, "tmp")
        self.table_path = os.path.join(args_op.output_folder, "tables")
        if args_op.terms is not None:
            self._check_gff(args_op.terms, "term")
            self.term_path = os.path.join(args_op.terms, "tmp")
        else:
            self.term_path = None

    def _check_gff(self, gffs, type_):
        for gff in os.listdir(gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(gffs, gff))

    def _detect_operon(self, prefixs, args_op):
        for prefix in prefixs:
            out_table = os.path.join(self.table_path,
                                     "_".join(["operon", prefix + ".csv"]))
            print("Detection operons of {0}".format(prefix))
            tss = self.helper.get_correct_file(
                    self.tss_path, "_TSS.gff", prefix, None, None)
            tran = self.helper.get_correct_file(
                    self.tran_path, "_transcript.gff", prefix, None, None)
            gff = self.helper.get_correct_file(
                    args_op.gffs, ".gff", prefix, None, None)
            if self.term_path is None:
                term = False
            else:
                term = self.helper.get_correct_file(
                        self.term_path, "_term.gff", prefix, None, None)
            operon(tran, tss, gff, term, args_op.tss_fuzzy,
                   args_op.term_fuzzy, args_op.length, out_table)

    def _check_and_parser_gff(self, args_op):
        self._check_gff(args_op.tsss, "tss")
        self._check_gff(args_op.gffs, "gff")
        self._check_gff(args_op.trans, "tran")
        self._check_gff(args_op.utr5s, "utr")
        self._check_gff(args_op.utr3s, "utr")
        self.multiparser.parser_gff(args_op.gffs, None)
        self.multiparser.parser_gff(args_op.tsss, "TSS")
        self.multiparser.combine_gff(args_op.gffs, self.tss_path, None, "TSS")
        self.multiparser.parser_gff(args_op.trans, "transcript")
        self.multiparser.combine_gff(args_op.gffs, self.tran_path,
                                     None, "transcript")
        self.multiparser.parser_gff(args_op.utr5s, "5UTR")
        self.multiparser.combine_gff(args_op.gffs, self.utr5_path,
                                     None, "5UTR")
        self.multiparser.parser_gff(args_op.utr3s, "3UTR")
        self.multiparser.combine_gff(args_op.gffs, self.utr3_path,
                                     None, "3UTR")
        if args_op.terms is not None:
            self._check_gff(args_op.terms, "term")
            self.multiparser.parser_gff(args_op.terms, "term")
            self.multiparser.combine_gff(args_op.gffs, self.term_path,
                                         None, "term")

    def _stat(self, table_path, stat_folder):
        for table in os.listdir(table_path):
            if table.startswith("operon_") and table.endswith(".csv"):
                filename = "_".join(["stat", table])
                out_stat = os.path.join(stat_folder, filename)
                stat(os.path.join(table_path, table), out_stat)

    def _combine_gff(self, prefixs, args_op):
        for prefix in prefixs:
            out_file = os.path.join(args_op.output_folder, "gffs",
                                    "_".join([prefix, "all_features.gff"]))
            print("Combine all features of {0}".format(prefix))
            tss = self.helper.get_correct_file(
                    self.tss_path, "_TSS.gff", prefix, None, None)
            tran = self.helper.get_correct_file(
                    self.tran_path, "_transcript.gff", prefix, None, None)
            gff = self.helper.get_correct_file(
                    args_op.gffs, ".gff", prefix, None, None)
            utr5 = self.helper.get_correct_file(
                    self.utr5_path, "_5UTR.gff", prefix, None, None)
            utr3 = self.helper.get_correct_file(
                    self.utr3_path, "_3UTR.gff", prefix, None, None)
            if self.term_path is None:
                term = None
            else:
                term = self.helper.get_correct_file(
                        self.term_path, "_term.gff", prefix, None, None)
            combine_gff(gff, tran, tss, utr5, utr3, term,
                        args_op.tss_fuzzy, args_op.term_fuzzy, out_file)

    def run_operon(self, args_op):
        self._check_and_parser_gff(args_op)
        prefixs = []
        for gff in os.listdir(args_op.gffs):
            if gff.endswith(".gff"):
                prefixs.append(gff.replace(".gff", ""))
#.........这里部分代码省略.........
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:103,代码来源:operon.py

示例3: sORFDetection

# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_gff [as 别名]
class sORFDetection(object):
    '''detection of sORF'''

    def __init__(self, args_sorf):
        self.multiparser = Multiparser()
        self.helper = Helper()
        if args_sorf.tsss is not None:
            self.tss_path = os.path.join(args_sorf.tsss, "tmp")
        else:
            self.tss_path = None
        if args_sorf.srnas is not None:
            self.srna_path = os.path.join(args_sorf.srnas, "tmp")
        else:
            self.srna_path = None
        self.gff_output = os.path.join(args_sorf.out_folder, "gffs")
        self.table_output = os.path.join(args_sorf.out_folder, "tables")
        self.tran_path = os.path.join(args_sorf.trans, "tmp")
        self.fasta_path = os.path.join(args_sorf.fastas, "tmp")
        self.all_cand = "all_candidates"
        self.best = "best_candidates"

    def _check_gff(self, gffs):
        for gff in os.listdir(gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(gffs, gff))

    def _check_necessary_files(self, args_sorf, log):
        if (args_sorf.gffs is None) or (args_sorf.trans is None) or (
               (args_sorf.tex_wigs is None) and (args_sorf.frag_wigs is None)):
            print("Error: lack required files!")
            log.write("genome annotation, transcript file or wiggle files "
                      "are not assigned.\n")
            sys.exit()
        if args_sorf.utr_detect:
            if (args_sorf.tsss is None):
                print("Error: TSS files are required for UTR derived"
                      " sORF detection!")
                log.write("TSS files are required for UTR derived"
                          " sORF detection!\n")
                sys.exit()
        self._check_gff(args_sorf.gffs)
        self.multiparser.parser_gff(args_sorf.gffs, None)
        if args_sorf.tsss is not None:
            self._check_gff(args_sorf.tsss)
            self.multiparser.parser_gff(args_sorf.tsss, "TSS")
            self.multiparser.combine_gff(args_sorf.gffs, self.tss_path,
                                         None, "TSS")
        self._check_gff(args_sorf.trans)
        if args_sorf.srnas is not None:
            self._check_gff(args_sorf.srnas)
            self.multiparser.parser_gff(args_sorf.srnas, "sRNA")
            self.multiparser.combine_gff(args_sorf.gffs, self.srna_path,
                                         None, "sRNA")

    def _start_stop_codon(self, prefixs, args_sorf, log):
        '''detect the sORF based on start and stop codon 
        and ribosome binding site'''
        log.write("Running sORF_detection.py for detecting sORFs.\n")
        log.write("The following files are generated:\n")
        for prefix in prefixs:
            print("Searching sORFs of {0}".format(prefix))
            if self.srna_path is not None:
                srna_file = os.path.join(self.srna_path,
                                         "_".join([prefix, "sRNA.gff"]))
            else:
                srna_file = None
            if self.tss_path is not None:
                tss_file = os.path.join(self.tss_path,
                                        "_".join([prefix, "TSS.gff"]))
            else:
                tss_file = None
            sorf_detection(os.path.join(self.fasta_path, prefix + ".fa"),
                           srna_file, os.path.join(args_sorf.out_folder,
                           "_".join([prefix, "inter.gff"])), tss_file,
                           os.path.join(args_sorf.wig_path,
                           "_".join([prefix, "forward.wig"])),
                           os.path.join(args_sorf.wig_path,
                           "_".join([prefix, "reverse.wig"])),
                           os.path.join(self.gff_output, self.all_cand,
                           "_".join([prefix, "sORF"])), args_sorf)
            if "_".join([prefix, "sORF_all.gff"]) in os.listdir(
                         os.path.join(self.gff_output, self.all_cand)):
                gff_all = os.path.join(self.gff_output, self.all_cand,
                                       "_".join([prefix, "sORF.gff"]))
                gff_best = os.path.join(self.gff_output, self.best,
                                        "_".join([prefix, "sORF.gff"]))
                csv_all = os.path.join(self.table_output, self.all_cand,
                                       "_".join([prefix, "sORF.csv"]))
                csv_best =  os.path.join(self.table_output, self.best,
                                         "_".join([prefix, "sORF.csv"]))
                shutil.move(os.path.join(self.gff_output, self.all_cand,
                            "_".join([prefix, "sORF_all.gff"])), gff_all)
                shutil.move(os.path.join(self.gff_output, self.all_cand,
                            "_".join([prefix, "sORF_best.gff"])), gff_best)
                shutil.move(os.path.join(self.gff_output, self.all_cand,
                            "_".join([prefix, "sORF_all.csv"])), csv_all)
                shutil.move(os.path.join(self.gff_output, self.all_cand,
                            "_".join([prefix, "sORF_best.csv"])), csv_best)
                log.write("\t" + gff_all + "\n")
                log.write("\t" + gff_best + "\n")
#.........这里部分代码省略.........
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:103,代码来源:sorf.py

示例4: TranscriptAssembly

# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_gff [as 别名]
class TranscriptAssembly(object):

    def __init__(self, args_tran):
        self.multiparser = Multiparser()
        self.helper = Helper()
        self.converter = Converter()
        self.gff_outfolder = os.path.join(args_tran.out_folder, "gffs")
        self.tran_path = os.path.join(self.gff_outfolder, "tmp")
        self.stat_path = os.path.join(args_tran.out_folder, "statistics")
        self.tmps = {"gff": "tmp.gff", "merge": "tmp_merge",
                     "tran": os.path.join(args_tran.out_folder, "tmp_tran"),
                     "tss_ta": os.path.join(self.gff_outfolder, "tmp_tss_ta"),
                     "ta_tss": os.path.join(self.gff_outfolder, "tmp_ta_tss"),
                     "ta_gff": os.path.join(self.gff_outfolder, "tmp_ta_gff"),
                     "gff_ta": os.path.join(self.gff_outfolder, "tmp_gff_ta"),
                     "uni": os.path.join(self.gff_outfolder, "tmp_uni"),
                     "overlap": os.path.join(
                         self.gff_outfolder, "tmp_overlap")}
        self.frag = "transcript_assembly_fragment.gff"
        self.tex = "transcript_assembly_tex_notex.gff"
        self.endfix_tran = "transcript.gff"

    def _compute_transcript(self, wig_f, wig_r, wig_folder, wig_type, strain,
                            libs, args_tran):
        print("Computing transcript assembly for {0}...".format(strain))
        out = os.path.join(args_tran.out_folder, "_".join([strain, wig_type]))
        assembly(wig_f, wig_r, wig_folder, libs, out, wig_type, args_tran)

    def _compute(self, wig_type, wigs, libs, args_tran):
        strains = []
        wig_folder = os.path.join(wigs, "tmp")
        for wig in os.listdir(wig_folder):
            if wig.endswith("_forward.wig"):
                strains.append(wig.replace("_forward.wig", ""))
        for strain in strains:
            f_file = os.path.join(wig_folder, "_".join(
                [strain, "forward.wig"]))
            r_file = os.path.join(wig_folder, "_".join(
                [strain, "reverse.wig"]))
            self._compute_transcript(f_file, r_file, wigs, wig_type,
                                     strain, libs, args_tran)
        return strains

    def _compare_tss(self, tas, args_tran):
        self.multiparser.parser_gff(args_tran.compare_tss, "TSS")
        self.multiparser.combine_gff(
                self.gff_outfolder,
                os.path.join(args_tran.compare_tss, "tmp"),
                "transcript", "TSS")
        print("Comaring of Transcript assembly and TSS file...")
        tss_folder = os.path.join(args_tran.compare_tss, "tmp")
        for ta in tas:
            ta_file = os.path.join(self.gff_outfolder,
                                   "_".join([ta, self.endfix_tran]))
            stat_tss_out = os.path.join(
                    self.stat_path, "".join([
                        "stat_compare_Transcriptome_assembly_TSS_",
                        ta, ".csv"]))
            for tss in os.listdir(tss_folder):
                filename = tss.split("_TSS")
                if (filename[0] == ta) and (tss.endswith(".gff")):
                    stat_ta_tss(ta_file, os.path.join(tss_folder, tss),
                                stat_tss_out, self.tmps["ta_tss"],
                                self.tmps["tss_ta"], args_tran.fuzzy)
                    os.remove(ta_file)
                    os.remove(os.path.join(tss_folder, tss))
                    self.helper.sort_gff(self.tmps["ta_tss"], ta_file)
                    self.helper.sort_gff(
                            self.tmps["tss_ta"], os.path.join(
                                args_tran.compare_tss, tss))
                    os.remove(self.tmps["tss_ta"])
                    os.remove(self.tmps["ta_tss"])

    def _compare_cds(self, tas, args_tran):
        self.multiparser.parser_gff(args_tran.compare_cds, None)
        self.multiparser.combine_gff(
            self.gff_outfolder, os.path.join(args_tran.compare_cds, "tmp"),
            "transcript", None)
        print("Comaring of Transcript assembly and gene...")
        cds_folder = os.path.join(args_tran.compare_cds, "tmp")
        for ta in tas:
            ta_file = os.path.join(self.gff_outfolder,
                                   "_".join([ta, self.endfix_tran]))
            stat_gff_out = os.path.join(self.stat_path, "".join([
                "stat_compare_Transcriptome_assembly_gene_", ta, ".csv"]))
            for gff in os.listdir(cds_folder):
                if (gff[:-4] == ta) and (gff.endswith(".gff")):
                    cds_file = os.path.join(cds_folder, gff)
                    stat_ta_gff(ta_file, cds_file, stat_gff_out,
                                self.tmps["ta_gff"], self.tmps["gff_ta"],
                                args_tran.c_feature)
                    os.remove(ta_file)
                    os.remove(os.path.join(args_tran.compare_cds, gff))
                    self.helper.sort_gff(self.tmps["ta_gff"], ta_file)
                    self.helper.sort_gff(self.tmps["gff_ta"], os.path.join(
                        args_tran.compare_cds, gff))
                    os.remove(self.tmps["ta_gff"])
                    os.remove(self.tmps["gff_ta"])

    def _compare_tss_cds(self, tas, args_tran):
#.........这里部分代码省略.........
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:103,代码来源:transcript.py

示例5: sRNADetection

# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_gff [as 别名]

#.........这里部分代码省略.........
            files["frag_gff"] = os.path.join(
                args_srna.out_folder, "_".join(["tmp_utr_frag", prefix]))
            files["frag_csv"] = os.path.join(
                args_srna.out_folder, "_".join(["tmp_utr_frag_table", prefix]))
            args_srna = self.args_container.container_utrsrna(
                    os.path.join(args_srna.gffs, gff), tran, tss, files,
                    pro, os.path.join(self.fasta_path, prefix + ".fa"),
                    "frag", prefix, args_srna)
            utr_derived_srna(args_srna)
        files["merge_csv"] = "_".join([self.prefixs["utr_table"], prefix])
        files["merge_gff"] = "_".join([self.prefixs["utr"], prefix])
        self._merge_frag_tex_file(files, args_srna)
        filter_utr(files["merge_gff"], files["merge_csv"], args_srna.min_utr)

    def _check_necessary_file(self, args_srna):
        if (args_srna.gffs is None) or (args_srna.trans is None) or (
                (args_srna.tex_wigs is None) and (
                args_srna.frag_wigs is None)):
            print("Error: lack required files!!!!")
            sys.exit()
        if args_srna.utr_srna:
            if (args_srna.tss_folder is None):
                print("Error: lack required TSS files for UTR "
                      "derived sRNA detection!!!!")
                sys.exit()
            if (args_srna.pro_folder is None):
                print("Warning: lack Processing site files for UTR "
                      "derived sRNA detection!!!")
                print("it may effect the results!!!!")
        self._check_gff(args_srna.gffs)
        self._check_gff(args_srna.trans)
        if args_srna.tss_folder is not None:
            self._check_gff(args_srna.tss_folder)
            self.multiparser.parser_gff(args_srna.tss_folder, "TSS")
            self.multiparser.combine_gff(args_srna.gffs, self.tss_path,
                                         None, "TSS")
        if args_srna.pro_folder is not None:
            self._check_gff(args_srna.pro_folder)
            self.multiparser.parser_gff(args_srna.pro_folder, "processing")
            self.multiparser.combine_gff(args_srna.gffs, self.pro_path,
                                         None, "processing")
        if args_srna.sorf_file is not None:
            self._check_gff(args_srna.sorf_file)
            self.multiparser.parser_gff(args_srna.sorf_file, "sORF")
            self.multiparser.combine_gff(args_srna.gffs, self.sorf_path,
                                         None, "sORF")
        if args_srna.utr_srna or ("sec_str" in args_srna.import_info) or (
           "blast_nr" in args_srna.import_info) or (
           "blast_srna" in args_srna.import_info):
            if args_srna.fastas is None:
                print("Error: lack required fasta files for UTR "
                      "derived sRNA detection!!!!")
                sys.exit()
            self.multiparser.parser_fasta(args_srna.fastas)
            self.multiparser.combine_fasta(args_srna.gffs,
                                           self.fasta_path, None)
        if args_srna.terms is not None:
            self._check_gff(args_srna.terms)
            self.multiparser.parser_gff(args_srna.terms, "term")
            self.multiparser.combine_gff(args_srna.gffs, self.term_path,
                                         None, "term")
        else:
            self.term_path = None

    def _run_program(self, args_srna):
        prefixs = []
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:70,代码来源:srna.py

示例6: Terminator

# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_gff [as 别名]
class Terminator(object):

    def __init__(self, args_term):
        self.multiparser = Multiparser()
        self.helper = Helper()
        self.converter = Converter()
        self.gff_parser = Gff3Parser()
        self.gff_path = os.path.join(args_term.gffs, "tmp")
        self.fasta_path = os.path.join(args_term.fastas, "tmp")
        self.tran_path = os.path.join(args_term.trans, "tmp")
        self.outfolder = {"term": os.path.join(args_term.out_folder, "gffs"),
                          "csv": os.path.join(args_term.out_folder, "tables")}
        self.terms = {"all": os.path.join(self.outfolder["term"],
                                          "all_candidates"),
                      "express": os.path.join(self.outfolder["term"],
                                              "express"),
                      "best": os.path.join(self.outfolder["term"], "best"),
                      "non": os.path.join(self.outfolder["term"],
                                          "non_express")}
        self.csvs = {"all": os.path.join(self.outfolder["csv"],
                                         "all_candidates"),
                     "express": os.path.join(self.outfolder["csv"], "express"),
                     "best": os.path.join(self.outfolder["csv"], "best"),
                     "non": os.path.join(self.outfolder["csv"], "non_express")}
        self.combine_path = os.path.join(self.gff_path, "combine")
        self.tmps = {"transterm": os.path.join(os.getcwd(), "tmp_transterm"),
                     "hp": "transtermhp", "hp_gff": "transtermhp.gff",
                     "hp_path": "tmp_transterm/tmp",
                     "term_table": os.path.join(os.getcwd(), "tmp_term_table"),
                     "merge": os.path.join(os.getcwd(), "tmp_merge_gff"),
                     "gff": "tmp.gff",
                     "folder": os.path.join(os.getcwd(), "tmp")}
        self.suffixs = {"gff": "term.gff", "csv": "term.csv",
                        "allgff": "term_all.gff"}
        if args_term.srnas:
            self.srna_path = os.path.join(args_term.srnas, "tmp")
        else:
            self.srna_path = None
        self._make_gff_folder()

    def _combine_annotation(self, combine_file, files):
        with open(combine_file, 'w') as result:
            for file_ in files:
                check_start = False
                fh = open(file_, 'r')
                for line in fh:
                    if check_start:
                        result.write(line)
                    if "Location" in line:
                        check_start = True
                if "\n" not in line:
                    result.write("\n")
                fh.close()

    def _make_gff_folder(self):
        self.helper.check_make_folder(self.terms["all"])
        self.helper.check_make_folder(self.csvs["all"])
        self.helper.check_make_folder(self.terms["best"])
        self.helper.check_make_folder(self.csvs["best"])
        self.helper.check_make_folder(self.terms["express"])
        self.helper.check_make_folder(self.csvs["express"])
        self.helper.check_make_folder(self.terms["non"])
        self.helper.check_make_folder(self.csvs["non"])

    def _convert_gff2rntptt(self, gff_path, fasta_path, sRNAs):
        file_types = {}
        prefixs = []
        for gff in os.listdir(gff_path):
            if gff.endswith(".gff"):
                filename = gff.split("/")
                prefix = filename[-1][:-4]
                prefixs.append(prefix)
                gff_file = os.path.join(gff_path, gff)
                rnt_file = os.path.join(gff_path, gff.replace(".gff", ".rnt"))
                ptt_file = os.path.join(gff_path, gff.replace(".gff", ".ptt"))
                fasta = self.helper.get_correct_file(
                             fasta_path, ".fa", prefix, None, None)
                if not fasta:
                    print("Error: no proper file - {0}.fa".format(prefix))
                    sys.exit()
                if sRNAs:
                    self.multiparser.parser_gff(sRNAs, "sRNA")
                    srna = self.helper.get_correct_file(
                            self.srna_path, "_sRNA.gff", prefix, None, None)
                    if (srna) and (fasta):
                        self.converter.convert_gff2rntptt(
                            gff_file, fasta, ptt_file, rnt_file, srna,
                            srna.replace(".gff", ".rnt"))
                        file_types[prefix] = "srna"
                    if (not srna) and (fasta):
                        self.converter.convert_gff2rntptt(
                            gff_file, fasta, ptt_file, rnt_file, None, None)
                        file_types[prefix] = "normal"
                else:
                    self.converter.convert_gff2rntptt(
                        gff_file, fasta, ptt_file, rnt_file, None, None)
                    file_types[prefix] = "normal"
        return file_types, prefixs

    def _combine_ptt_rnt(self, gff_path, file_types, srna_path):
#.........这里部分代码省略.........
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:103,代码来源:terminator.py

示例7: MEME

# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_gff [as 别名]

#.........这里部分代码省略.........
                        fasta.endswith(".fasta")):
                    self.helper.merge_file(os.path.join(
                         args_pro.fastas, fasta), self.all_fasta)
        print("generating fasta file of all fasta files")
        prefixs.append("allfasta")
        input_path = os.path.join(self.out_fasta, "allfasta")
        self.helper.check_make_folder(os.path.join(
                                      args_pro.output_folder, "allfasta"))
        self.helper.check_make_folder(os.path.join(
                                      self.out_fasta, "allfasta"))
        args_pro.source = True
        upstream(self.all_tss, self.all_fasta, None,
                 None, args_pro)
        self._move_and_merge_fasta(input_path, "allfasta")

    def _remove_files(self, args_pro):
        self.helper.remove_tmp(args_pro.fastas)
        self.helper.remove_tmp(args_pro.tsss)
        self.helper.remove_tmp(args_pro.gffs)
        self.helper.remove_tmp(args_pro.wigs)
        if "allfasta.fa" in os.listdir(args_pro.fastas):
            os.remove(self.all_fasta)
        if "allfasta" in os.listdir(os.getcwd()):
            shutil.rmtree("allfasta")
        shutil.rmtree("tmp")

    def _gen_table(self, output_folder, prefixs, combine):
        if combine:
            strains = prefixs + ["allfasta"]
        else:
            strains = prefixs
        for strain in strains:
            for folder in os.listdir(os.path.join(output_folder, strain)):
                tss_file = os.path.join(self.tss_path, strain + "_TSS.gff")
                gen_promoter_table(os.path.join(output_folder, strain,
                                   folder, "meme.txt"),
                                   os.path.join(output_folder, strain,
                                   folder, "meme.csv"), tss_file)

    def _get_upstream(self, args_pro, prefix, tss, fasta):
        if args_pro.source:
            print("generating fasta file of {0}".format(prefix))
            upstream(os.path.join(self.tss_path, tss),
                     os.path.join(args_pro.fastas, fasta),
                     None, None, args_pro)
        else:
            if (args_pro.gffs is None) or (
                    args_pro.wigs is None) or (
                    args_pro.input_libs is None):
                print("Error:please assign proper annotation, tex +/- "
                      "wig folder and tex treated libs!!!")
                sys.exit()
            if "TSS_class" not in os.listdir(args_pro.output_folder):
                os.mkdir(os.path.join(args_pro.output_folder, "TSS_class"))
            print("classifying TSS and extracting fasta {0}".format(prefix))
            upstream(os.path.join(self.tss_path, tss),
                     os.path.join(args_pro.fastas, fasta),
                     os.path.join(self.gff_path, prefix + ".gff"),
                     os.path.join(args_pro.output_folder, "TSS_class",
                     "_".join([prefix, "TSS.gff"])), args_pro)

    def run_meme(self, args_pro):
        if "allfasta.fa" in os.listdir(args_pro.fastas):
            os.remove(self.all_fasta)
            if "allfasta.fa_folder" in os.listdir(args_pro.fastas):
                shutil.rmtree(os.path.join(args_pro.fastas,
                              "allfasta.fa_folder"))
        self.multiparser.parser_fasta(args_pro.fastas)
        self.multiparser.parser_gff(args_pro.tsss, "TSS")
        if "allfasta_TSS.gff" in os.listdir(self.tss_path):
            os.remove(self.all_tss)
        if args_pro.gffs is not None:
            self._check_gff(args_pro.gffs)
            self.multiparser.parser_gff(args_pro.gffs, None)
            self.multiparser.combine_gff(args_pro.fastas, self.gff_path,
                                         "fasta", None)
        self._check_gff(args_pro.tsss)
        self.multiparser.combine_gff(args_pro.fastas, self.tss_path,
                                     "fasta", "TSS")
        self.helper.check_make_folder(self.out_fasta)
        self.helper.check_make_folder(self.tmp_folder)
        prefixs = []
        for tss in os.listdir(self.tss_path):
            prefix = tss.replace("_TSS.gff", "")
            prefixs.append(prefix)
            self.helper.check_make_folder(os.path.join(args_pro.output_folder,
                                                       prefix))
            self.helper.check_make_folder(os.path.join(self.out_fasta,
                                                       prefix))
            input_path = os.path.join(self.out_fasta, prefix)
            fasta = self._get_fasta_file(args_pro.fastas, prefix)
            self._get_upstream(args_pro, prefix, tss, fasta)
            self._move_and_merge_fasta(input_path, prefix)
            self._split_fasta_by_strain(input_path)
        if args_pro.combine:
            self._combine_file(prefixs, args_pro)
        self._run_program(prefixs, args_pro)
        print("generating the table...")
        self._gen_table(args_pro.output_folder, prefixs, args_pro.combine)
        self._remove_files(args_pro)
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:104,代码来源:meme.py

示例8: Ribos

# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_gff [as 别名]

#.........这里部分代码省略.........
                regenerate_seq(first_scan_file, first_seq,
                               first_table, sec_seq)
                print("scanning of {0}".format(prefix))
                sec_scan_file = self._run_infernal(args_ribo, sec_seq,
                                                   "re_txt", prefix)
                sec_table = os.path.join(
                        self.tmp_files["table"],
                        "_".join([prefix, self.suffixs["re_csv"]]))
                reextract_rbs(sec_scan_file, first_table, sec_table)
                shutil.move(sec_table, first_table)
                modify_table(first_table, args_ribo.output_all)
        return prefixs

    def _merge_results(self, args_ribo):
        for gff in os.listdir(args_ribo.gffs):
            if gff.endswith(".gff"):
                prefix = gff.replace(".gff", "")
                print("Merge results of {0}".format(prefix))
                pre_strain = ""
                self.helper.check_make_folder(os.path.join(
                                              self.scan_folder, prefix))
                fh = open(os.path.join(args_ribo.gffs, gff))
                for entry in self.gff_parser.entries(fh):
                    if entry.seq_id != pre_strain:
                        if len(pre_strain) == 0:
                            shutil.copyfile(os.path.join(
                                self.tmp_files["table"],
                                "_".join([entry.seq_id, self.suffixs["csv"]])),
                                os.path.join(
                                    self.table_folder,
                                    "_".join([prefix, self.suffixs["csv"]])))
                        else:
                            self.helper.merge_file(os.path.join(
                                self.tmp_files["table"],
                                "_".join([entry.seq_id, self.suffixs["csv"]])),
                                os.path.join(
                                    self.table_folder,
                                    "_".join([prefix, self.suffixs["csv"]])))
                        shutil.copy(os.path.join(
                            self.tmp_files["scan"],
                            "_".join([entry.seq_id, self.suffixs["txt"]])),
                            os.path.join(self.scan_folder, prefix))
                        shutil.copy(os.path.join(
                            self.tmp_files["scan"],
                            "_".join([entry.seq_id, self.suffixs["re_txt"]])),
                            os.path.join(self.scan_folder, prefix))
                        pre_strain = entry.seq_id
                out_stat = os.path.join(
                        self.stat_folder,
                        "_".join(["stat", prefix, "riboswitch.txt"]))
                print("compute statistics of {0}".format(prefix))
                stat_and_covert2gff(os.path.join(
                    self.table_folder,
                    "_".join([prefix, self.suffixs["csv"]])),
                    args_ribo.ribos_id, os.path.join(
                        self.gff_outfolder,
                        "_".join([prefix, "riboswitch.gff"])),
                    args_ribo.fuzzy, out_stat)
                fh.close()

    def _remove_tmp(self, args_ribo):
        self.helper.remove_tmp(args_ribo.gffs)
        self.helper.remove_tmp(args_ribo.fastas)
        self.helper.remove_all_content(args_ribo.out_folder, "tmp", "dir")

    def _remove_overlap(self, gff_path):
        for gff in os.listdir(gff_path):
            if gff.endswith(".gff"):
                rbs_overlap(
                    os.path.join(os.path.join(
                        self.tmp_files["table"],
                        "_".join([gff.replace(".gff", ""),
                                  self.suffixs["csv"]]))),
                    os.path.join(gff_path, gff))

    def run_ribos(self, args_ribo):
        if args_ribo.fuzzy_rbs > 6:
            print("Error: --fuzzy_rbs should be equal or less than 6!!")
            sys.exit()
        self.multiparser.parser_gff(args_ribo.gffs, None)
        self.multiparser.parser_fasta(args_ribo.fastas)
        self.multiparser.parser_gff(args_ribo.trans, "transcript")
        self.multiparser.parser_gff(args_ribo.tsss, "TSS")
        for gff in os.listdir(args_ribo.gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(
                                                 args_ribo.gffs, gff))
        rbs_from_rfam(args_ribo.ribos_id, args_ribo.rfam, self.ribos_rfam)
        print("compressing Rfam...")
        call([os.path.join(args_ribo.infernal_path, "cmpress"),
              "-F", self.ribos_rfam])
        prefixs = []
        self.helper.check_make_folder(self.tmp_files["fasta"])
        self.helper.check_make_folder(self.tmp_files["scan"])
        self.helper.check_make_folder(self.tmp_files["table"])
        prefixs = self._scan_extract_rfam(prefixs, args_ribo)
        self._remove_overlap(self.gff_path)
        self._merge_results(args_ribo)
        mapping_ribos(self.table_folder, args_ribo.ribos_id)
        self._remove_tmp(args_ribo)
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:104,代码来源:ribos.py

示例9: TSSpredator

# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_gff [as 别名]

#.........这里部分代码省略.........
        shutil.rmtree(self.tmps["tss"])

    def _validate(self, tsss, args_tss, log):
        '''validate TSS with genome annotation'''
        print("Validating TSSs with genome annotations")
        log.write("Running validate_gene.py to compare genome "
                  "annotations and TSSs/PSs.\n")
        for tss in tsss:
            for gff in os.listdir(args_tss.gffs):
                if (gff[:-4] == tss) and (".gff" in gff):
                    break
            stat_file = os.path.join(
                    self.stat_outfolder, tss,
                    "".join(["stat_gene_vali_", tss, ".csv"]))
            out_cds_file = os.path.join(args_tss.out_folder, "tmp.gff")
            if args_tss.program.lower() == "tss":
                compare_file = os.path.join(self.gff_outfolder,
                                            "_".join([tss, "TSS.gff"]))
            elif args_tss.program.lower() == "processing":
                compare_file = os.path.join(self.gff_outfolder,
                                            "_".join([tss, "processing.gff"]))
            validate_gff(compare_file, os.path.join(args_tss.gffs, gff),
                         stat_file, out_cds_file, args_tss.utr_length,
                         args_tss.program.lower())
            log.write("\t" + stat_file + " is generated.\n")
            shutil.move(out_cds_file, os.path.join(args_tss.gffs, gff))

    def _compare_ta(self, tsss, args_tss, log):
        '''compare TSS with transcript'''
        detect = False
        log.write("Running stat_TA_comparison to compare transcripts "
                  "and TSSs/PSs.\n")
        print("Comparing transcripts and TSSs")
        self.multiparser.parser_gff(args_tss.ta_files, "transcript")
        self.multiparser.combine_gff(args_tss.gffs, self.tmps["ta"],
                                     None, "transcript")
        for tss in tsss:
            stat_out = os.path.join(
                    self.stat_outfolder, tss, "".join([
                        "stat_compare_TSS_transcript_",
                        tss, ".csv"]))
            for ta in os.listdir(self.tmps["ta"]):
                filename = ta.split("_transcript")
                if (filename[0] == tss) and (filename[1] == ".gff"):
                    detect = True
                    break
            compare_file = os.path.join(self.gff_outfolder,
                                        "_".join([tss, "TSS.gff"]))
            if detect:
                stat_ta_tss(os.path.join(self.tmps["ta"], ta), compare_file,
                            stat_out, self.tmps["ta_tss"],
                            self.tmps["tss_ta"], args_tss.fuzzy)
                self.helper.sort_gff(self.tmps["tss_ta"], compare_file)
                self.helper.sort_gff(self.tmps["ta_tss"],
                                     os.path.join(args_tss.ta_files, ta))
                os.remove(self.tmps["tss_ta"])
                os.remove(self.tmps["ta_tss"])
                detect = False
            log.write("\t" + stat_out + " is generated.\n")

    def _stat_tss(self, tsss, feature, log):
        print("Running statistaics")
        for tss in tsss:
            compare_file = os.path.join(self.gff_outfolder,
                                        "_".join([tss, feature]) + ".gff")
            stat_tsspredator(
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:70,代码来源:tsspredator.py

示例10: CircRNADetection

# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_gff [as 别名]

#.........这里部分代码省略.........
        for prefix in tmp_prefixs:
            self.helper.check_make_folder(os.path.join(self.gff_folder,
                                                       prefix))
            shutil.copytree(prefix, os.path.join(self.splice_path, prefix))
            self.helper.check_make_folder(os.path.join(
                                          self.candidate_path, prefix))
            print("comparing with annotation of {0}".format(prefix))
            if self.splices["all_file"] in os.listdir(os.path.join(
                                           self.splice_path, prefix)):
                detect_circrna(os.path.join(self.splice_path, prefix,
                               self.splices["all_file"]), os.path.join(
                               self.gff_path, prefix + ".gff"),
                               os.path.join(self.candidate_path, prefix,
                               "_".join(["circRNA", prefix + "_all.csv"])),
                               args_circ, os.path.join(args_circ.stat_folder,
                               "_".join(["stat_circRNA", prefix + ".csv"])))
                self.converter.convert_circ2gff(
                     os.path.join(self.candidate_path, prefix,
                                  "_".join(["circRNA",
                                            prefix + "_all.csv"])),
                     args_circ, os.path.join(
                                self.gff_folder, prefix,
                                "_".join([prefix, "circRNA_all.gff"])),
                     os.path.join(self.gff_folder, prefix,
                                  "_".join([prefix, "circRNA_best.gff"])))

    def _assign_merge_bam(self, args_circ):
        remove_frags = []
        bam_files = []
        if (args_circ.normal_bams is not None) and (
                args_circ.frag_bams is not None):
            for frag in os.listdir(args_circ.frag_bams):
                if frag.endswith(".bam"):
                    shutil.copyfile(os.path.join(args_circ.frag_bams, frag),
                                    os.path.join(args_circ.normal_bams, frag))
                    remove_frags.append(frag)
            merge_folder = args_circ.normal_bams
        elif (args_circ.normal_bams is not None):
            merge_folder = args_circ.normal_bams
        elif (args_circ.frag_bams is not None):
            merge_folder = args_circ.frag_bams
        else:
            print("Error: please assign bam folder or do alignment!!")
            sys.exit()
        for bam in os.listdir(merge_folder):
            if bam.endswith(".bam"):
                bam_files.append(os.path.join(merge_folder, bam))
        return merge_folder, remove_frags, bam_files

    def run_circrna(self, args_circ):
        for gff in os.listdir(args_circ.gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(
                                                 args_circ.gffs, gff))
        if args_circ.segemehl_path is None:
            print("Error: please assign segemehl folder!!")
            sys.exit()
        self.multiparser.parser_gff(args_circ.gffs, None)
        self.multiparser.combine_gff(args_circ.fastas, self.gff_path,
                                     "fasta", None)
        tmp_reads = []
        if args_circ.align:
            self.multiparser.parser_fasta(args_circ.fastas)
            tmp_reads = self._deal_zip_file(args_circ.read_folder)
            align_files, prefixs = self._align(args_circ)
        else:
            self.multiparser.parser_fasta(args_circ.fastas)
            prefixs = []
            for fasta in os.listdir(self.fasta_path):
                fasta_prefix = fasta.replace(".fa", "")
                prefixs.append(fasta_prefix)
            merge_folder, remove_frag, bam_files = self._assign_merge_bam(
                                                   args_circ)
            align_files = None
        for prefix in prefixs:
            if args_circ.align:
                sub_alignment_path = os.path.join(self.alignment_path, prefix)
                bam_files, convert_ones, remove_ones = self._convert_sam2bam(
                    sub_alignment_path, args_circ.samtools_path, align_files)
            else:
                sub_alignment_path = merge_folder
                convert_ones = []
                remove_ones = []
            self._merge_sort_aligment_file(
                bam_files, args_circ.samtools_path, sub_alignment_path,
                convert_ones, tmp_reads, remove_ones)
            self._run_testrealign(prefix, args_circ.segemehl_path,
                                  sub_alignment_path)
        tmp_prefixs = self._merge_bed(args_circ.fastas, self.splice_path)
        self.multiparser.parser_gff(args_circ.gffs, None)
        self.multiparser.combine_gff(args_circ.fastas, self.gff_path,
                                     "fasta", None)
        self._stat_and_gen_gff(tmp_prefixs, args_circ)
        self.helper.remove_tmp(args_circ.fastas)
        self.helper.remove_tmp(args_circ.gffs)
        for tmp_prefix in tmp_prefixs:
            shutil.rmtree(tmp_prefix)
        if (not args_circ.align) and (len(remove_frag) != 0):
            for frag in remove_frag:
                os.remove(os.path.join(merge_folder, frag))
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:104,代码来源:circrna.py

示例11: UTRDetection

# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_gff [as 别名]
class UTRDetection(object):

    def __init__(self, args_utr):
        self.helper = Helper()
        self.multiparser = Multiparser()
        self.tss_path = os.path.join(args_utr.tsss, "tmp")
        self.tran_path = os.path.join(args_utr.trans, "tmp")
        self.utr5_path = os.path.join(args_utr.out_folder, "5UTR")
        self.utr3_path = os.path.join(args_utr.out_folder, "3UTR")
        self.utr5_stat_path = os.path.join(self.utr5_path, "statistics")
        self.utr3_stat_path = os.path.join(self.utr3_path, "statistics")

    def _check_folder(self, folder):
        if folder is None:
            print("Error: lack required files!!!")
            sys.exit()

    def _check_gff(self, folder):
        for gff in os.listdir(folder):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(folder, gff))

    def _compute_utr(self, args_utr):
        for gff in os.listdir(args_utr.gffs):
            if gff.endswith(".gff"):
                prefix = gff[:-4]
                tss = self.helper.get_correct_file(
                        self.tss_path, "_TSS.gff", prefix, None, None)
                tran = self.helper.get_correct_file(
                        self.tran_path, "_transcript.gff", prefix, None, None)
                if args_utr.terms:
                    term = self.helper.get_correct_file(
                                os.path.join(args_utr.terms, "tmp"),
                                "_term.gff", prefix, None, None)
                else:
                    term = None
                print("computing 5'UTR of {0} .....".format(prefix))
                detect_5utr(tss, os.path.join(args_utr.gffs, gff),
                            tran, os.path.join(self.utr5_path, "gffs",
                            "_".join([prefix, "5UTR.gff"])), args_utr)
                print("computing 3'UTR of {0} .....".format(prefix))
                detect_3utr(tran, os.path.join(args_utr.gffs, gff),
                            term, os.path.join(self.utr3_path, "gffs",
                            "_".join([prefix, "3UTR.gff"])), args_utr)
                self.helper.move_all_content(
                    os.getcwd(), self.utr5_stat_path, ["_5utr_length.png"])
                self.helper.move_all_content(
                    os.getcwd(), self.utr3_stat_path, ["_3utr_length.png"])

    def run_utr_detection(self, args_utr):
        self._check_folder(args_utr.tsss)
        self._check_folder(args_utr.gffs)
        self._check_folder(args_utr.trans)
        self._check_gff(args_utr.tsss)
        self._check_gff(args_utr.gffs)
        self._check_gff(args_utr.trans)
        self._check_gff(args_utr.terms)
        self.multiparser.parser_gff(args_utr.gffs, None)
        self.multiparser.parser_gff(args_utr.tsss, "TSS")
        self.multiparser.combine_gff(args_utr.gffs, self.tss_path, None, "TSS")
        self.multiparser.parser_gff(args_utr.trans, "transcript")
        self.multiparser.combine_gff(args_utr.gffs, self.tran_path,
                                     None, "transcript")
        if args_utr.terms:
            self.multiparser.parser_gff(args_utr.terms, "term")
            self.multiparser.combine_gff(args_utr.gffs,
                                         os.path.join(args_utr.terms, "tmp"),
                                         None, "term")
        self._compute_utr(args_utr)
        self.helper.remove_tmp(args_utr.gffs)
        self.helper.remove_tmp(args_utr.tsss)
        self.helper.remove_tmp(args_utr.trans)
        self.helper.remove_tmp(args_utr.terms)
        self.helper.remove_tmp(self.utr5_path)
        self.helper.remove_tmp(self.utr3_path)
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:77,代码来源:utr.py

示例12: TSSpredator

# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_gff [as 别名]

#.........这里部分代码省略.........
                predict, stat_file,
                os.path.join(self.tmps["tss"], filename),
                os.path.join(args_tss.gffs, gff), args_tss)
            shutil.move(stat_file, os.path.join(args_tss.out_folder,
                                                "statistics", tss, stat_file))
        self.helper.move_all_content(self.tmps["tss"],
                                     self.gff_outfolder, [".gff"])
        shutil.rmtree(self.tmps["tss"])

    def _validate(self, tsss, args_tss):
        print("Running validation of annotation....")
        for tss in tsss:
            for gff in os.listdir(args_tss.gffs):
                if (gff[:-4] == tss) and (".gff" in gff):
                    break
            stat_file = os.path.join(
                    self.stat_outfolder, tss,
                    "".join(["stat_gene_vali_", tss, ".csv"]))
            out_cds_file = os.path.join(args_tss.out_folder, "tmp.gff")
            if args_tss.program.lower() == "tss":
                compare_file = os.path.join(self.gff_outfolder,
                                            "_".join([tss, "TSS.gff"]))
            elif args_tss.program.lower() == "processing":
                compare_file = os.path.join(self.gff_outfolder,
                                            "_".join([tss, "processing.gff"]))
            validate_gff(compare_file, os.path.join(args_tss.gffs, gff),
                         stat_file, out_cds_file, args_tss.utr_length,
                         args_tss.program.lower())
            shutil.move(out_cds_file, os.path.join(args_tss.gffs, gff))

    def _compare_ta(self, tsss, args_tss):
        detect = False
        print("Running compare transcript assembly and TSS ...")
        self.multiparser.parser_gff(args_tss.ta_files, "transcript")
        self.multiparser.combine_gff(args_tss.gffs, self.tmps["ta"],
                                     None, "transcript")
        for tss in tsss:
            stat_out = os.path.join(
                    self.stat_outfolder, tss, "".join([
                        "stat_compare_TSS_Transcriptome_assembly_",
                        tss, ".csv"]))
            for ta in os.listdir(self.tmps["ta"]):
                filename = ta.split("_transcript")
                if (filename[0] == tss) and (filename[1] == ".gff"):
                    detect = True
                    break
            compare_file = os.path.join(self.gff_outfolder,
                                        "_".join([tss, "TSS.gff"]))
            if detect:
                stat_ta_tss(os.path.join(self.tmps["ta"], ta), compare_file,
                            stat_out, self.tmps["ta_tss"],
                            self.tmps["tss_ta"], args_tss.fuzzy)
                self.helper.sort_gff(self.tmps["tss_ta"], compare_file)
                self.helper.sort_gff(self.tmps["ta_tss"],
                                     os.path.join(args_tss.ta_files, ta))
                os.remove(self.tmps["tss_ta"])
                os.remove(self.tmps["ta_tss"])
                detect = False

    def _stat_tss(self, tsss, feature):
        print("Running statistaics.....")
        for tss in tsss:
            compare_file = os.path.join(self.gff_outfolder,
                                        "_".join([tss, feature]) + ".gff")
            stat_tsspredator(
                compare_file, feature,
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:70,代码来源:tsspredator.py

示例13: GoTermFinding

# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_gff [as 别名]
class GoTermFinding(object):

    def __init__(self, args_go):
        self.multiparser = Multiparser()
        self.helper = Helper()
        self.out_all = os.path.join(args_go.out_folder, "all_CDS")
        self.out_express = os.path.join(args_go.out_folder, "expressed_CDS")
        self.result_all_path = os.path.join(self.out_all, "Go_term_results")
        self.result_express_path = os.path.join(self.out_express,
                                                "Go_term_results")
        self.gff_path = os.path.join(args_go.gffs, "tmp")
        if args_go.trans is not None:
            self.tran_path = os.path.join(args_go.trans, "tmp")
        else:
            self.tran_path = None
        self.stat_all_path = os.path.join(self.out_all, "statistics")
        self.stat_express_path = os.path.join(self.out_express,
                                              "statistics")
        self.all_strain = "all_strains_uniprot.csv"

    def _retrieve_go(self, uniprot, out_path, type_):
        prefixs = []
        for gff in os.listdir(self.gff_path):
            prefix = gff.replace(".gff", "")
            prefixs.append(prefix)
            self.helper.check_make_folder(os.path.join(out_path, prefix))
            out_file = os.path.join(out_path, prefix,
                                    "_".join([prefix, "uniprot.csv"]))
            print("extracting Go terms of {0} from UniProt...".format(prefix))
            if self.tran_path is not None:
                tran_file = os.path.join(self.tran_path,
                                         "_".join([prefix, "transcript.gff"]))
            else:
                tran_file = None
            retrieve_uniprot(uniprot, os.path.join(self.gff_path, gff),
                             out_file, tran_file, type_)

    def _merge_files(self, gffs, out_path, out_folder):
        folders = []
        for folder in os.listdir(gffs):
            if folder.endswith("gff_folder"):
                folder_prefix = folder.replace(".gff_folder", "")
                folder_path = os.path.join(out_folder, folder_prefix)
                self.helper.check_make_folder(folder_path)
                folders.append(folder_path)
                filenames = []
                for gff in os.listdir(os.path.join(gffs, folder)):
                    if gff.endswith(".gff"):
                        filenames.append(gff.replace(".gff", ""))
                out_all = os.path.join(folder_path, self.all_strain)
                if len(filenames) > 1:
                    if self.all_strain in os.listdir(folder_path):
                        os.remove(out_all)
                    for filename in filenames:
                        csv_file = "_".join([filename, "uniprot.csv"])
                        self.helper.merge_file(os.path.join(out_path,
                                               filename, csv_file), out_all)
                        shutil.copy(os.path.join(out_path, filename, csv_file),
                                    folder_path)
                else:
                    shutil.copyfile(os.path.join(out_path, filenames[0],
                                    "_".join([filenames[0], "uniprot.csv"])),
                                    out_all)
        self.helper.remove_all_content(out_path, None, "dir")
        self.helper.remove_all_content(out_path, None, "file")
        for folder in folders:
            folder_prefix = folder.split("/")[-1]
            shutil.move(folder, os.path.join(out_path, folder_prefix))

    def _stat(self, out_path, stat_path, go, goslim, out_folder):
        for folder in os.listdir(out_path):
            strain_stat_path = os.path.join(stat_path, folder)
            self.helper.check_make_folder(strain_stat_path)
            fig_path = os.path.join(strain_stat_path, "figs")
            if "fig" not in os.listdir(strain_stat_path):
                os.mkdir(fig_path)
            print("Computing statistics of {0}".format(folder))
            map2goslim(goslim, go,
                       os.path.join(out_path, folder, self.all_strain),
                       os.path.join(strain_stat_path,
                                    "_".join(["stat", folder + ".csv"])),
                       out_folder)
            self.helper.move_all_content(out_folder, fig_path,
                                         ["_three_roots.png"])
            self.helper.move_all_content(out_folder, fig_path,
                                         ["_molecular_function.png"])
            self.helper.move_all_content(out_folder, fig_path,
                                         ["_cellular_component.png"])
            self.helper.move_all_content(out_folder, fig_path,
                                         ["_biological_process.png"])

    def run_go_term(self, args_go):
        for gff in os.listdir(args_go.gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(
                                                 args_go.gffs, gff))
        self.multiparser.parser_gff(args_go.gffs, None)
        if args_go.trans is not None:
            self.multiparser.parser_gff(args_go.trans, "transcript")
        print("Computing all CDS...")
#.........这里部分代码省略.........
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:103,代码来源:goterm.py

示例14: GoTermFinding

# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_gff [as 别名]

#.........这里部分代码省略.........
                             "Protein_id", "Go_term"]) + "\n")
        for row in csv.reader(fh, delimiter='\t'):
            if row[0] != "Genome":
                out.write("\t".join(row) + "\n")
        out.close()
        fh.close()
        shutil.move(out_all + "_tmp", out_all)

    def _merge_files(self, gffs, out_path, out_folder, log):
        '''merge the files according to the input genome folder'''
        folders = []
        log.write("Merging the output files based on the input genome "
                  "information.\n")
        for folder in os.listdir(gffs):
            if folder.endswith("gff_folder"):
                folder_prefix = folder.replace(".gff_folder", "")
                folder_path = os.path.join(out_folder, folder_prefix)
                self.helper.check_make_folder(folder_path)
                folders.append(folder_path)
                filenames = []
                for gff in os.listdir(os.path.join(gffs, folder)):
                    if gff.endswith(".gff"):
                        filenames.append(gff.replace(".gff", ""))
                out_all = os.path.join(folder_path, self.all_strain)
                if len(filenames) > 1:
                    if self.all_strain in os.listdir(folder_path):
                        os.remove(out_all)
                    for filename in filenames:
                        csv_file = "_".join([filename, "uniprot.csv"])
                        self.helper.merge_file(os.path.join(out_path,
                                               filename, csv_file), out_all)
                        self._remove_header(out_all)
                        shutil.copy(os.path.join(out_path, filename, csv_file),
                                    folder_path)
                else:
                    shutil.copyfile(os.path.join(out_path, filenames[0],
                                    "_".join([filenames[0], "uniprot.csv"])),
                                    out_all)
        self.helper.remove_all_content(out_path, None, "dir")
        self.helper.remove_all_content(out_path, None, "file")
        for folder in folders:
            folder_prefix = folder.split("/")[-1]
            shutil.move(folder, os.path.join(out_path, folder_prefix))
            for file_ in os.listdir(os.path.join(out_path, folder_prefix)):
                log.write("\t" + os.path.join(out_path, folder_prefix, file_) + 
                          " is generated.\n")

    def _stat(self, out_path, stat_path, go, goslim, out_folder, log):
        log.write("Running gene_ontology.py to Retrieve GOslim terms and "
                  "do statistics.\n")
        log.write("The following files are generated:\n")
        for folder in os.listdir(out_path):
            strain_stat_path = os.path.join(stat_path, folder)
            self.helper.check_make_folder(strain_stat_path)
            fig_path = os.path.join(strain_stat_path, "figs")
            if "fig" not in os.listdir(strain_stat_path):
                os.mkdir(fig_path)
            stat_file = os.path.join(strain_stat_path,
                                    "_".join(["stat", folder + ".csv"]))
            map2goslim(goslim, go,
                       os.path.join(out_path, folder, self.all_strain),
                       stat_file, out_folder)
            log.write("\t" + stat_file + "\n")
            self.helper.move_all_content(out_folder, fig_path,
                                         ["_three_roots.png"])
            self.helper.move_all_content(out_folder, fig_path,
                                         ["_molecular_function.png"])
            self.helper.move_all_content(out_folder, fig_path,
                                         ["_cellular_component.png"])
            self.helper.move_all_content(out_folder, fig_path,
                                         ["_biological_process.png"])
            for file_ in os.listdir(fig_path):
                log.write("\t" + os.path.join(fig_path, file_) + "\n")

    def run_go_term(self, args_go, log):
        for gff in os.listdir(args_go.gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(
                                                 args_go.gffs, gff))
        self.multiparser.parser_gff(args_go.gffs, None)
        if args_go.trans is not None:
            self.multiparser.parser_gff(args_go.trans, "transcript")
        print("Computing all CDSs")
        log.write("Retrieving GO terms for all CDSs.\n")
        self._retrieve_go(args_go.uniprot, self.result_all_path, "all", log)
        self._merge_files(args_go.gffs, self.result_all_path, self.out_all, log)
        self._stat(self.result_all_path, self.stat_all_path, args_go.go,
                   args_go.goslim, self.out_all, log)
        if args_go.trans is not None:
            log.write("Retrieving GO terms only for expressed CDSs.\n")
            print("Computing express CDSs")
            self._retrieve_go(args_go.uniprot, self.result_express_path,
                              "express", log)
            self._merge_files(args_go.gffs, self.result_express_path,
                              self.out_express, log)
            self._stat(self.result_express_path, self.stat_express_path,
                       args_go.go, args_go.goslim, self.out_express, log)
        self.helper.remove_tmp_dir(args_go.gffs)
        if args_go.trans is not None:
            self.helper.remove_tmp_dir(args_go.trans)
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:104,代码来源:goterm.py

示例15: sRNATargetPrediction

# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_gff [as 别名]

#.........这里部分代码省略.........
            if "_".join([prefix, "RNAup.txt"]) in \
                    os.listdir(os.path.join(self.rnaup_path, prefix)):
                if not args_tar.continue_rnaup:
                    os.remove(out_rnaup)
                    os.remove(out_log)
                else:
                    srnas = self._get_continue(out_rnaup)
            with open(os.path.join(self.srna_seq_path, "_".join([
                    self.tmps["tmp"], prefix, "sRNA.fa"])), "r") as s_f:
                for line in s_f:
                    line = line.strip()
                    if line.startswith(">"):
                        if line[1:] in srnas:
                            start = False
                            continue
                        start = True
                        print("Running RNAup with {0}".format(line[1:]))
                        num_up += 1
                        out_up = open(os.path.join(args_tar.out_folder,
                                      "".join([self.tmps["tmp"],
                                               str(num_up), ".fa"])), "w")
                        out_up.write(line + "\n")
                    else:
                        if start:
                            out_up.write(line + "\n")
                            out_up.close()
                            self.helper.merge_file(os.path.join(
                                self.target_seq_path,
                                "_".join([prefix, "target.fa"])),
                                os.path.join(args_tar.out_folder,
                                             "".join([self.tmps["tmp"],
                                                      str(num_up), ".fa"])))
                            if num_up == args_tar.core_up:
                                self._run_rnaup(num_up, processes,
                                                out_rnaup, out_log, args_tar)
                                processes = []
                                num_up = 0
            self._run_rnaup(num_up, processes, out_rnaup, out_log, args_tar)

    def _merge_rnaplex_rnaup(self, prefixs, args_tar):
        for prefix in prefixs:
            rnaplex_file = None
            rnaup_file = None
            out_rnaplex = None
            out_rnaup = None
            self.helper.check_make_folder(os.path.join(
                                          self.merge_path, prefix))
            print("Ranking {0} now...".format(prefix))
            if (args_tar.program == "both") or (args_tar.program == "RNAplex"):
                rnaplex_file = os.path.join(self.rnaplex_path, prefix,
                                            "_".join([prefix, "RNAplex.txt"]))
                out_rnaplex = os.path.join(
                        self.rnaplex_path, prefix,
                        "_".join([prefix, "RNAplex_rank.csv"]))
            if (args_tar.program == "both") or (args_tar.program == "RNAup"):
                rnaup_file = os.path.join(self.rnaup_path, prefix,
                                          "_".join([prefix, "RNAup.txt"]))
                out_rnaup = os.path.join(self.rnaup_path, prefix,
                                         "_".join([prefix, "RNAup_rank.csv"]))
            merge_srna_target(rnaplex_file, rnaup_file, args_tar,
                              out_rnaplex, out_rnaup,
                              os.path.join(self.merge_path, prefix,
                                           "_".join([prefix, "merge.csv"])),
                              os.path.join(self.merge_path, prefix,
                                           "_".join([prefix, "overlap.csv"])),
                              os.path.join(self.srna_path,
                                           "_".join([prefix, "sRNA.gff"])),
                              os.path.join(self.gff_path, prefix + ".gff"))

    def run_srna_target_prediction(self, args_tar):
        self._check_gff(args_tar.gffs)
        self._check_gff(args_tar.srnas)
        self.multiparser.parser_gff(args_tar.gffs, None)
        self.multiparser.parser_fasta(args_tar.fastas)
        self.multiparser.parser_gff(args_tar.srnas, "sRNA")
        prefixs = []
        self._gen_seq(prefixs, args_tar)
        if (args_tar.program == "both") or (
                args_tar.program == "RNAplex"):
            self._rna_plex(prefixs, args_tar)
        self.helper.remove_all_content(self.target_seq_path,
                                       "_target_", "file")
        if (args_tar.program == "both") or (
                args_tar.program == "RNAup"):
            self._rnaup(prefixs, args_tar)
        self._merge_rnaplex_rnaup(prefixs, args_tar)
        if (args_tar.program == "RNAplex") or (
                args_tar.program == "both"):
            for strain in os.listdir(os.path.join(
                          args_tar.out_folder, "RNAplex")):
                shutil.rmtree(os.path.join(args_tar.out_folder, "RNAplex",
                                           strain, "RNAplfold"))
        self.helper.remove_all_content(args_tar.out_folder,
                                       self.tmps["tmp"], "dir")
        self.helper.remove_all_content(args_tar.out_folder,
                                       self.tmps["tmp"], "file")
        self.helper.remove_tmp(args_tar.gffs)
        self.helper.remove_tmp(args_tar.srnas)
        self.helper.remove_tmp(args_tar.fastas)
        self.helper.remove_all_content(self.srna_seq_path, "tmp_", "file")
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:104,代码来源:srna_target.py


注:本文中的annogesiclib.multiparser.Multiparser.parser_gff方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。