当前位置: 首页>>代码示例>>Python>>正文


Python Helper.get_correct_file方法代码示例

本文整理汇总了Python中annogesiclib.helper.Helper.get_correct_file方法的典型用法代码示例。如果您正苦于以下问题:Python Helper.get_correct_file方法的具体用法?Python Helper.get_correct_file怎么用?Python Helper.get_correct_file使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在annogesiclib.helper.Helper的用法示例。


在下文中一共展示了Helper.get_correct_file方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: TestHelper

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
class TestHelper(unittest.TestCase):

    def setUp(self):
        self.example = ExampleData()
        self.helper = Helper()
        self.gff_out = self.example.gff_out
        self.rev_seq = self.example.rev_seq.replace("\n", "")
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)        
        self.gff_file = os.path.join(self.test_folder, "test.gff")
        with open(self.gff_file, "w") as rh:
            rh.write(self.example.gff_file)        
        self.seq_file = os.path.join(self.test_folder, "test.fa")
        with open(self.seq_file, "w") as rh:
            rh.write(self.example.seq)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_remove_all_content(self):
        tmp1 = os.path.join(self.test_folder, "tmp1.gff")
        tmp2 = os.path.join(self.test_folder, "tmp2")
        shutil.copyfile(self.gff_file, tmp1)
        os.mkdir(tmp2)
        self.helper.remove_all_content(self.test_folder, "tmp", "file")
        self.assertFalse(os.path.exists(tmp1))
        self.assertTrue(os.path.exists(tmp2))
        self.helper.remove_all_content(self.test_folder, "tmp", "dir")
        self.assertFalse(os.path.exists(tmp2))
        self.assertTrue(os.path.exists(self.gff_file))

    def test_remove_tmp(self):
        tmp1 = os.path.join(self.test_folder, "tmp")
        tmp2 = os.path.join(self.test_folder, "test.gff_folder")
        os.mkdir(tmp1)
        os.mkdir(tmp2)
        self.helper.remove_tmp(self.test_folder)
        self.assertFalse(os.path.exists(tmp1))
        self.assertFalse(os.path.exists(tmp2))

    def test_get_correct_file(self):
        gff_file = os.path.join(self.test_folder, "test.gff")
        wig_f_file = os.path.join(self.test_folder,
                                  "test_forward.wig_STRAIN_aaa.wig")
        wig_r_file = os.path.join(self.test_folder,
                                  "test_reverse.wig_STRAIN_aaa.wig")
        shutil.copyfile(gff_file, wig_f_file)
        shutil.copyfile(gff_file, wig_r_file)
        libs = ["test_forward.wig_STRAIN_aaa.wig:frag:1:a:+",
                "test_reverse.wig_STRAIN_aaa.wig:frag:1:a:-"]
        filename = self.helper.get_correct_file(
            self.test_folder, ".gff", "test", None, libs)
        self.assertEqual(filename, gff_file)
        
    def test_sorf_gff(self):
        out_file = os.path.join(self.test_folder, "test.out")
        self.helper.sort_gff(self.gff_file, out_file)
        datas = import_data(out_file)
        self.assertEqual(set(datas), set(self.gff_out.split("\n")))

    def test_extract_gene(self):
        seq = self.example.seq.replace("\n", "")
        new_seq = self.helper.extract_gene(seq, 1, 70, "+")
        self.assertEqual(new_seq,
        "CGCAGGTTGAGTTCCTGTTCCCGATAGATCCGATAAACCCGCTTATGATTCCAGAGCTGTCCCTGCACAT")
        new_seq = self.helper.extract_gene(seq, 1, 140, "-")
        self.assertEqual(new_seq, self.rev_seq)

    def test_get_seq(self):
        gff_file = os.path.join(self.test_folder, "test.gff")
        out_file = os.path.join(self.test_folder, "test.cds")
        lines = self.example.gff_out.split("\n")
        with open(gff_file, "w") as gh:
            gh.write(lines[1])
        self.helper.get_seq(self.gff_file, self.seq_file, out_file)
        datas = import_data(out_file)
        self.assertEqual(set(datas), set([">cds0|aaa|1|10|+", "CGCAGGTTGA"]))
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:81,代码来源:test_helper.py

示例2: Terminator

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
class Terminator(object):

    def __init__(self, args_term):
        self.multiparser = Multiparser()
        self.helper = Helper()
        self.converter = Converter()
        self.gff_parser = Gff3Parser()
        self.gff_path = os.path.join(args_term.gffs, "tmp")
        self.fasta_path = os.path.join(args_term.fastas, "tmp")
        self.tran_path = os.path.join(args_term.trans, "tmp")
        self.outfolder = {"term": os.path.join(args_term.out_folder, "gffs"),
                          "csv": os.path.join(args_term.out_folder, "tables")}
        self.terms = {"all": os.path.join(self.outfolder["term"],
                                          "all_candidates"),
                      "express": os.path.join(self.outfolder["term"],
                                              "express"),
                      "best": os.path.join(self.outfolder["term"], "best"),
                      "non": os.path.join(self.outfolder["term"],
                                          "non_express")}
        self.csvs = {"all": os.path.join(self.outfolder["csv"],
                                         "all_candidates"),
                     "express": os.path.join(self.outfolder["csv"], "express"),
                     "best": os.path.join(self.outfolder["csv"], "best"),
                     "non": os.path.join(self.outfolder["csv"], "non_express")}
        self.combine_path = os.path.join(self.gff_path, "combine")
        self.tmps = {"transterm": os.path.join(os.getcwd(), "tmp_transterm"),
                     "hp": "transtermhp", "hp_gff": "transtermhp.gff",
                     "hp_path": "tmp_transterm/tmp",
                     "term_table": os.path.join(os.getcwd(), "tmp_term_table"),
                     "merge": os.path.join(os.getcwd(), "tmp_merge_gff"),
                     "gff": "tmp.gff",
                     "folder": os.path.join(os.getcwd(), "tmp")}
        self.suffixs = {"gff": "term.gff", "csv": "term.csv",
                        "allgff": "term_all.gff"}
        if args_term.srnas:
            self.srna_path = os.path.join(args_term.srnas, "tmp")
        else:
            self.srna_path = None
        self._make_gff_folder()

    def _combine_annotation(self, combine_file, files):
        with open(combine_file, 'w') as result:
            for file_ in files:
                check_start = False
                fh = open(file_, 'r')
                for line in fh:
                    if check_start:
                        result.write(line)
                    if "Location" in line:
                        check_start = True
                if "\n" not in line:
                    result.write("\n")
                fh.close()

    def _make_gff_folder(self):
        self.helper.check_make_folder(self.terms["all"])
        self.helper.check_make_folder(self.csvs["all"])
        self.helper.check_make_folder(self.terms["best"])
        self.helper.check_make_folder(self.csvs["best"])
        self.helper.check_make_folder(self.terms["express"])
        self.helper.check_make_folder(self.csvs["express"])
        self.helper.check_make_folder(self.terms["non"])
        self.helper.check_make_folder(self.csvs["non"])

    def _convert_gff2rntptt(self, gff_path, fasta_path, sRNAs):
        file_types = {}
        prefixs = []
        for gff in os.listdir(gff_path):
            if gff.endswith(".gff"):
                filename = gff.split("/")
                prefix = filename[-1][:-4]
                prefixs.append(prefix)
                gff_file = os.path.join(gff_path, gff)
                rnt_file = os.path.join(gff_path, gff.replace(".gff", ".rnt"))
                ptt_file = os.path.join(gff_path, gff.replace(".gff", ".ptt"))
                fasta = self.helper.get_correct_file(
                             fasta_path, ".fa", prefix, None, None)
                if not fasta:
                    print("Error: no proper file - {0}.fa".format(prefix))
                    sys.exit()
                if sRNAs:
                    self.multiparser.parser_gff(sRNAs, "sRNA")
                    srna = self.helper.get_correct_file(
                            self.srna_path, "_sRNA.gff", prefix, None, None)
                    if (srna) and (fasta):
                        self.converter.convert_gff2rntptt(
                            gff_file, fasta, ptt_file, rnt_file, srna,
                            srna.replace(".gff", ".rnt"))
                        file_types[prefix] = "srna"
                    if (not srna) and (fasta):
                        self.converter.convert_gff2rntptt(
                            gff_file, fasta, ptt_file, rnt_file, None, None)
                        file_types[prefix] = "normal"
                else:
                    self.converter.convert_gff2rntptt(
                        gff_file, fasta, ptt_file, rnt_file, None, None)
                    file_types[prefix] = "normal"
        return file_types, prefixs

    def _combine_ptt_rnt(self, gff_path, file_types, srna_path):
#.........这里部分代码省略.........
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:103,代码来源:terminator.py

示例3: RATT

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
class RATT(object):

    def __init__(self, args_ratt):
        self.multiparser = Multiparser()
        self.converter = Converter()
        self.format_fixer = FormatFixer()
        self.helper = Helper()
        self.gbk = os.path.join(args_ratt.ref_embls, "gbk_tmp")
        self.gbk_tmp = os.path.join(self.gbk, "tmp")
        self.embl = os.path.join(args_ratt.ref_embls, "embls")
        self.ratt_log = os.path.join(args_ratt.output_path, "ratt_log.txt")
        self.tmp_files = {"tar": os.path.join(args_ratt.tar_fastas, "tmp"),
                          "ref": os.path.join(args_ratt.ref_fastas, "tmp"),
                          "out_gff": os.path.join(args_ratt.gff_outfolder,
                                                  "tmp"),
                          "gff": os.path.join(args_ratt.gff_outfolder,
                                              "tmp.gff"),
                          "ptt": os.path.join(args_ratt.gff_outfolder,
                                              "tmp.ptt"),
                          "rnt": os.path.join(args_ratt.gff_outfolder,
                                              "tmp.rnt")}

    def _convert_to_pttrnt(self, gffs, files):
        for gff in files:
            if gff.endswith(".gff"):
                gff = os.path.join(gffs, gff)
                filename = gff.split("/")
                prefix = filename[-1][:-4]
                rnt = gff[:-3] + "rnt"
                ptt = gff[:-3] + "ptt"
                fasta = self.helper.get_correct_file(self.tmp_files["tar"],
                                                     ".fa", prefix, None, None)
                if fasta:
                    self.converter.convert_gff2rntptt(gff, fasta, ptt, rnt,
                                                      None, None)

    def _remove_files(self, args_ratt, out_gbk):
        self.helper.remove_all_content(args_ratt.gff_outfolder, ".gff", "file")
        self.helper.remove_all_content(args_ratt.gff_outfolder, ".ptt", "file")
        self.helper.remove_all_content(args_ratt.gff_outfolder, ".rnt", "file")
        self.helper.move_all_content(self.tmp_files["out_gff"],
                                     args_ratt.gff_outfolder, None)
        shutil.rmtree(self.tmp_files["out_gff"])
        shutil.rmtree(self.tmp_files["tar"])
        shutil.rmtree(self.tmp_files["ref"])
        shutil.rmtree(self.embl)
        self.helper.remove_all_content(args_ratt.tar_fastas, "_folder", "dir")
        self.helper.remove_all_content(args_ratt.ref_fastas, "_folder", "dir")
        if out_gbk:
            shutil.rmtree(out_gbk)

    def _convert_to_gff(self, ratt_result, args_ratt, files):
        name = ratt_result.split(".")
        filename = ".".join(name[1:-2]) + ".gff"
        output_file = os.path.join(args_ratt.output_path, filename)
        self.converter.convert_embl2gff(
             os.path.join(args_ratt.output_path, ratt_result), output_file)
        self.format_fixer.fix_ratt(output_file, ".".join(name[1:-2]),
                                   "tmp_gff")
        shutil.move("tmp_gff", output_file)
        shutil.copy(output_file, os.path.join(args_ratt.gff_outfolder,
                                              filename))
        files.append(filename)

    def _parser_embl_gbk(self, files):
        self.helper.check_make_folder(self.gbk)
        for file_ in files:
            close = False
            with open(file_, "r") as f_h:
                for line in f_h:
                    if (line.startswith("LOCUS")):
                        out = open(self.gbk_tmp, "w")
                        datas = line.split(" ")
                        for data in datas:
                            if (len(data) != 0) and (data != "LOCUS"):
                                filename = ".".join([data, "gbk"])
                                break
                    elif (line.startswith("VERSION")):
                        datas = line.split(" ")
                        for data in datas:
                            if (len(data) != 0) and (data != "VERSION"):
                                new_filename = ".".join([data, "gbk"])
                                break
                        if new_filename.find(filename):
                            filename = new_filename
                    if out:
                        out.write(line)
                    if line.startswith("//"):
                        out.close()
                        close = True
                        shutil.move(self.gbk_tmp,
                                    os.path.join(self.gbk, filename))
            if not close:
                out.close()
        return self.gbk

    def _convert_embl(self, ref_embls):
        detect_gbk = False
        gbks = []
        out_gbk = None
#.........这里部分代码省略.........
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:103,代码来源:ratt.py

示例4: sRNADetection

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]

#.........这里部分代码省略.........
        if database_type == "sRNA":
            change_format(database, "tmp_srna_database")
            os.remove(database)
            shutil.move("tmp_srna_database", database)
        db_file = ".".join(database.split(".")[:-1])
        self._run_format(blast_path, database, type_, db_file, err)
        err.close()

    def _merge_frag_tex_file(self, files, args_srna):
        if (args_srna.frag_wigs is not None) and (
                args_srna.tex_wigs is not None):
            self.helper.merge_file(files["frag_gff"], files["tex_gff"])
            self.helper.merge_file(files["frag_csv"], files["tex_csv"])
            shutil.move(files["tex_csv"], files["merge_csv"])
            self.helper.sort_gff(files["tex_gff"], files["merge_gff"])
            os.remove(files["frag_csv"])
            os.remove(files["frag_gff"])
            os.remove(files["tex_gff"])
        elif (args_srna.frag_wigs is not None):
            shutil.move(files["frag_csv"], files["merge_csv"])
            self.helper.sort_gff(files["frag_gff"], files["merge_gff"])
            os.remove(files["frag_gff"])
        elif (args_srna.tex_wigs is not None):
            shutil.move(files["tex_csv"], files["merge_csv"])
            self.helper.sort_gff(files["tex_gff"], files["merge_gff"])

    def _run_normal(self, prefix, gff, tran, fuzzy_tss, args_srna):
        if "tmp_cutoff_inter" in os.listdir(args_srna.out_folder):
            os.remove(os.path.join(args_srna.out_folder, "tmp_cutoff_inter"))
        files = {"frag_gff": None, "frag_csv": None,
                 "tex_gff": None, "tex_csv": None,
                 "merge_gff": None, "merge_csv": None}
        if ("tss" in args_srna.import_info):
            tss = self.helper.get_correct_file(self.tss_path, "_TSS.gff",
                                               prefix, None, None)
        else:
            tss = None
        if self.pro_path is not None:
            pro = self.helper.get_correct_file(
                    self.pro_path, "_processing.gff", prefix, None, None)
        else:
            pro = None
        if args_srna.frag_wigs is not None:
            files["frag_gff"] = os.path.join(
                    args_srna.out_folder, "_".join(["tmp_frag", prefix]))
            files["frag_csv"] = os.path.join(
                    args_srna.out_folder, "_".join(["tmp_frag_table", prefix]))

            args_srna = self.args_container.container_intersrna(
                             "frag", files, args_srna, prefix,
                             os.path.join(args_srna.gffs, gff), tran, tss,
                             pro, fuzzy_tss)
            intergenic_srna(args_srna)
        if args_srna.tex_wigs is not None:
            files["tex_gff"] = os.path.join(
                    args_srna.out_folder, "_".join(["tmp_tex", prefix]))
            files["tex_csv"] = os.path.join(
                    args_srna.out_folder, "_".join(["tmp_tex_table", prefix]))
            args_srna = self.args_container.container_intersrna(
                           "tex", files, args_srna, prefix,
                           os.path.join(args_srna.gffs, gff), tran, tss,
                           pro, fuzzy_tss)
            intergenic_srna(args_srna)
        files["merge_csv"] = "_".join([self.prefixs["normal_table"], prefix])
        files["merge_gff"] = "_".join([self.prefixs["normal"], prefix])
        self._merge_frag_tex_file(files, args_srna)
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:70,代码来源:srna.py

示例5: OperonDetection

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
class OperonDetection(object):
    '''detection of operon'''

    def __init__(self, args_op):
        self.multiparser = Multiparser()
        self.helper = Helper()
        if args_op.tsss is not None:
            self.tss_path = os.path.join(args_op.tsss, "tmp")
        else:
            self.tss_path = None
        self.tran_path = os.path.join(args_op.trans, "tmp")
        self.table_path = os.path.join(args_op.output_folder, "tables")
        if args_op.terms is not None:
            self._check_gff(args_op.terms, "term")
            self.term_path = os.path.join(args_op.terms, "tmp")
        else:
            self.term_path = None

    def _check_gff(self, gffs, type_):
        for gff in os.listdir(gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(gffs, gff))

    def _detect_operon(self, prefixs, args_op, log):
        log.write("Running detect_operon.py to detect operon.\n")
        log.write("The the following files are generated:\n")
        for prefix in prefixs:
            out_gff = os.path.join(args_op.output_folder, "gffs",
                                    "_".join([prefix, "operon.gff"]))
            out_table = os.path.join(self.table_path,
                                     "_".join([prefix, "operon.csv"]))
            print("Detecting operons of {0}".format(prefix))
            if self.tss_path is None:
                tss = False
            else:
                tss = self.helper.get_correct_file(
                        self.tss_path, "_TSS.gff", prefix, None, None)
            tran = self.helper.get_correct_file(
                    self.tran_path, "_transcript.gff", prefix, None, None)
            gff = self.helper.get_correct_file(
                    args_op.gffs, ".gff", prefix, None, None)
            if self.term_path is None:
                term = False
            else:
                term = self.helper.get_correct_file(
                        self.term_path, "_term.gff", prefix, None, None)
            operon(tran, tss, gff, term, args_op.tss_fuzzy,
                   args_op.term_fuzzy, args_op.length, out_table, out_gff)
            log.write("\t" + out_table + "\n")
            log.write("\t" + out_gff + "\n")

    def _check_and_parser_gff(self, args_op):
        self._check_gff(args_op.gffs, "gff")
        self._check_gff(args_op.trans, "tran")
        self.multiparser.parser_gff(args_op.gffs, None)
        self.multiparser.parser_gff(args_op.trans, "transcript")
        self.multiparser.combine_gff(args_op.gffs, self.tran_path,
                                     None, "transcript")
        if args_op.tsss is not None:
            self._check_gff(args_op.tsss, "tss")
            self.multiparser.parser_gff(args_op.tsss, "TSS")
            self.multiparser.combine_gff(args_op.gffs, self.tss_path, None, "TSS")
        if args_op.terms is not None:
            self._check_gff(args_op.terms, "term")
            self.multiparser.parser_gff(args_op.terms, "term")
            self.multiparser.combine_gff(args_op.gffs, self.term_path,
                                         None, "term")

    def _stat(self, table_path, stat_folder, log):
        log.write("Running stat_operon.py to do statistics.\n")
        for table in os.listdir(table_path):
            if table.endswith("_operon.csv"):
                filename = "_".join(["stat", table])
                out_stat = os.path.join(stat_folder, filename)
                stat(os.path.join(table_path, table), out_stat)
                log.write("\t" + out_stat + "\n")


    def run_operon(self, args_op, log):
        self._check_and_parser_gff(args_op)
        prefixs = []
        for gff in os.listdir(args_op.gffs):
            if gff.endswith(".gff"):
                prefixs.append(gff.replace(".gff", ""))
        self._detect_operon(prefixs, args_op, log)
        self._stat(self.table_path, args_op.stat_folder, log)
        self.helper.remove_tmp_dir(args_op.gffs)
        self.helper.remove_tmp_dir(args_op.tsss)
        self.helper.remove_tmp_dir(args_op.trans)
        if args_op.terms is not None:
            self.helper.remove_tmp_dir(args_op.terms)
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:93,代码来源:operon.py

示例6: OperonDetection

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
class OperonDetection(object):

    def __init__(self, args_op):
        self.multiparser = Multiparser()
        self.helper = Helper()
        self.tss_path = os.path.join(args_op.tsss, "tmp")
        self.tran_path = os.path.join(args_op.trans, "tmp")
        self.utr5_path = os.path.join(args_op.utr5s, "tmp")
        self.utr3_path = os.path.join(args_op.utr3s, "tmp")
        self.table_path = os.path.join(args_op.output_folder, "tables")
        if args_op.terms is not None:
            self._check_gff(args_op.terms, "term")
            self.term_path = os.path.join(args_op.terms, "tmp")
        else:
            self.term_path = None

    def _check_gff(self, gffs, type_):
        for gff in os.listdir(gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(gffs, gff))

    def _detect_operon(self, prefixs, args_op):
        for prefix in prefixs:
            out_table = os.path.join(self.table_path,
                                     "_".join(["operon", prefix + ".csv"]))
            print("Detection operons of {0}".format(prefix))
            tss = self.helper.get_correct_file(
                    self.tss_path, "_TSS.gff", prefix, None, None)
            tran = self.helper.get_correct_file(
                    self.tran_path, "_transcript.gff", prefix, None, None)
            gff = self.helper.get_correct_file(
                    args_op.gffs, ".gff", prefix, None, None)
            if self.term_path is None:
                term = False
            else:
                term = self.helper.get_correct_file(
                        self.term_path, "_term.gff", prefix, None, None)
            operon(tran, tss, gff, term, args_op.tss_fuzzy,
                   args_op.term_fuzzy, args_op.length, out_table)

    def _check_and_parser_gff(self, args_op):
        self._check_gff(args_op.tsss, "tss")
        self._check_gff(args_op.gffs, "gff")
        self._check_gff(args_op.trans, "tran")
        self._check_gff(args_op.utr5s, "utr")
        self._check_gff(args_op.utr3s, "utr")
        self.multiparser.parser_gff(args_op.gffs, None)
        self.multiparser.parser_gff(args_op.tsss, "TSS")
        self.multiparser.combine_gff(args_op.gffs, self.tss_path, None, "TSS")
        self.multiparser.parser_gff(args_op.trans, "transcript")
        self.multiparser.combine_gff(args_op.gffs, self.tran_path,
                                     None, "transcript")
        self.multiparser.parser_gff(args_op.utr5s, "5UTR")
        self.multiparser.combine_gff(args_op.gffs, self.utr5_path,
                                     None, "5UTR")
        self.multiparser.parser_gff(args_op.utr3s, "3UTR")
        self.multiparser.combine_gff(args_op.gffs, self.utr3_path,
                                     None, "3UTR")
        if args_op.terms is not None:
            self._check_gff(args_op.terms, "term")
            self.multiparser.parser_gff(args_op.terms, "term")
            self.multiparser.combine_gff(args_op.gffs, self.term_path,
                                         None, "term")

    def _stat(self, table_path, stat_folder):
        for table in os.listdir(table_path):
            if table.startswith("operon_") and table.endswith(".csv"):
                filename = "_".join(["stat", table])
                out_stat = os.path.join(stat_folder, filename)
                stat(os.path.join(table_path, table), out_stat)

    def _combine_gff(self, prefixs, args_op):
        for prefix in prefixs:
            out_file = os.path.join(args_op.output_folder, "gffs",
                                    "_".join([prefix, "all_features.gff"]))
            print("Combine all features of {0}".format(prefix))
            tss = self.helper.get_correct_file(
                    self.tss_path, "_TSS.gff", prefix, None, None)
            tran = self.helper.get_correct_file(
                    self.tran_path, "_transcript.gff", prefix, None, None)
            gff = self.helper.get_correct_file(
                    args_op.gffs, ".gff", prefix, None, None)
            utr5 = self.helper.get_correct_file(
                    self.utr5_path, "_5UTR.gff", prefix, None, None)
            utr3 = self.helper.get_correct_file(
                    self.utr3_path, "_3UTR.gff", prefix, None, None)
            if self.term_path is None:
                term = None
            else:
                term = self.helper.get_correct_file(
                        self.term_path, "_term.gff", prefix, None, None)
            combine_gff(gff, tran, tss, utr5, utr3, term,
                        args_op.tss_fuzzy, args_op.term_fuzzy, out_file)

    def run_operon(self, args_op):
        self._check_and_parser_gff(args_op)
        prefixs = []
        for gff in os.listdir(args_op.gffs):
            if gff.endswith(".gff"):
                prefixs.append(gff.replace(".gff", ""))
#.........这里部分代码省略.........
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:103,代码来源:operon.py

示例7: SubLocal

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
class SubLocal(object):

    def __init__(self, args_sub):
        self.multiparser = Multiparser()
        self.helper = Helper()
        self.fixer = FormatFixer()
        self.gff_path = os.path.join(args_sub.gffs, "tmp")
        self.fasta_path = os.path.join(args_sub.fastas, "tmp")
        if args_sub.trans is not None:
            self.tran_path = os.path.join(args_sub.trans, "tmp")
        else:
            self.tran_path = None
        self.out_all = os.path.join(args_sub.out_folder, "all_CDS")
        self.out_express = os.path.join(args_sub.out_folder, "expressed_CDS")
        self.all_tmp_path = os.path.join(self.out_all, "tmp")
        self.express_tmp_path = os.path.join(self.out_express, "tmp")
        self.all_stat_path = os.path.join(self.out_all, "statistics")
        self.express_stat_path = os.path.join(self.out_express, "statistics")
        self.all_tmp_result = os.path.join(self.out_all, "tmp_results")
        self.express_tmp_result = os.path.join(self.out_express, "tmp_results")
        self.all_result = os.path.join(self.out_all, "psortb_results")
        self.express_result = os.path.join(self.out_express, "psortb_results")
        self.endfix_table = "table.csv"
        self.endfix_raw = "raw.txt"
        self._make_folder()

    def _make_folder(self):
        self.helper.check_make_folder(self.out_all)
        self.helper.check_make_folder(self.out_express)
        self.helper.check_make_folder(self.all_stat_path)
        self.helper.check_make_folder(self.express_stat_path)
        self.helper.check_make_folder(self.all_result)
        self.helper.check_make_folder(self.express_result)

    def _compare_cds_tran(self, gff_file, tran_file):
        out = open(os.path.join(self.out_all, "tmp_cds.gff"), "w")
        cdss = []
        fh = open(gff_file)
        th = open(tran_file)
        for entry in Gff3Parser().entries(fh):
            if entry.feature == "CDS":
                cdss.append(entry)
        trans = []
        for entry in Gff3Parser().entries(th):
            trans.append(entry)
        for cds in cdss:
            for ta in trans:
                if (cds.strand == ta.strand) and (
                        cds.seq_id == ta.seq_id):
                    if ((cds.end < ta.end) and (
                             cds.end > ta.start) and (
                             cds.start <= ta.start)) or (
                            (cds.start > ta.start) and (
                             cds.start < ta.end) and (
                             cds.end >= ta.end)) or (
                            (cds.end >= ta.end) and (
                             cds.start <= ta.start)) or (
                            (cds.end <= ta.end) and (
                             cds.start >= ta.start)):
                        out.write(cds.info + "\n")
                        break
        fh.close()
        th.close()
        out.close()

    def _get_protein_seq(self, gff, tmp_path, tran_path):
        prefix = gff.replace(".gff", "")
        fasta = self.helper.get_correct_file(self.fasta_path, ".fa",
                                             prefix, None, None)
        dna_seq_file = os.path.join(tmp_path, "_".join([prefix, "dna.fa"]))
        print("Generate CDS fasta files of {0}".format(prefix))
        if tran_path is not None:
            self._compare_cds_tran(os.path.join(self.gff_path, gff),
                                   os.path.join(tran_path, "_".join([
                                       prefix, "transcript.gff"])))
            self.helper.get_cds_seq(os.path.join(self.out_all, "tmp_cds.gff"),
                                    fasta, dna_seq_file)
            os.remove(os.path.join(self.out_all, "tmp_cds.gff"))
        else:
            self.helper.get_cds_seq(os.path.join(self.gff_path, gff),
                                    fasta, dna_seq_file)
        print("transfer DNA seq to protein seq of {0}".format(prefix))
        self.helper.translation(dna_seq_file, "tmp")
        prot_seq_file = os.path.join(
                tmp_path, "_".join([prefix, "protein.fa"]))
        self.fixer.fix_emboss("tmp", prot_seq_file)
        os.remove("tmp")
        return prefix

    def _psortb(self, psortb_path, strain_type, prot_seq_file,
                out_raw, out_err):
        call([psortb_path, strain_type, prot_seq_file],
             stdout=out_raw, stderr=out_err)

    def _run_psortb(self, args_sub, prefix, out_folder, tmp_path, tmp_result):
        print("Running psortb of {0}".format(prefix))
        out_err = open(os.path.join(out_folder, "tmp_log"), "w")
        out_raw = open(os.path.join(tmp_result,
                       "_".join([prefix, self.endfix_raw])), "w")
        prot_seq_file = os.path.join(tmp_path,
#.........这里部分代码省略.........
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:103,代码来源:sublocal.py

示例8: Terminator

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
class Terminator(object):
    '''detection of terminator'''

    def __init__(self, args_term):
        self.multiparser = Multiparser()
        self.helper = Helper()
        self.converter = Converter()
        self.gff_parser = Gff3Parser()
        self.gff_path = os.path.join(args_term.gffs, "tmp")
        self.fasta_path = os.path.join(args_term.fastas, "tmp")
        self.tran_path = os.path.join(args_term.trans, "tmp")
        self.outfolder = {"term": os.path.join(args_term.out_folder, "gffs"),
                          "csv": os.path.join(args_term.out_folder, "tables")}
        self.terms = {"all": os.path.join(self.outfolder["term"],
                                          "all_candidates"),
                      "express": os.path.join(self.outfolder["term"],
                                              "expressed_candidates"),
                      "best": os.path.join(self.outfolder["term"],
                                           "best_candidates"),
                      "non": os.path.join(self.outfolder["term"],
                                          "non_expressed_candidates")}
        self.csvs = {"all": os.path.join(self.outfolder["csv"],
                                         "all_candidates"),
                     "express": os.path.join(self.outfolder["csv"],
                                             "expressed_candidates"),
                     "best": os.path.join(self.outfolder["csv"],
                                          "best_candidates"),
                     "non": os.path.join(self.outfolder["csv"],
                                         "non_expressed_candidates")}
        self.combine_path = os.path.join(self.gff_path, "combine")
        self.tmps = {"transterm": os.path.join(os.getcwd(), "tmp_transterm"),
                     "hp": "transtermhp", "hp_gff": "transtermhp.gff",
                     "hp_path": "tmp_transterm/tmp",
                     "term_table": os.path.join(os.getcwd(), "tmp_term_table"),
                     "merge": os.path.join(os.getcwd(), "tmp_merge_gff"),
                     "gff": "tmp.gff",
                     "folder": os.path.join(os.getcwd(), "tmp")}
        self.suffixs = {"gff": "term.gff", "csv": "term.csv",
                        "allgff": "term_all.gff"}
        if args_term.srnas:
            self.srna_path = os.path.join(args_term.srnas, "tmp")
        else:
            self.srna_path = None
        self._make_gff_folder()

    def _combine_annotation(self, combine_file, files):
        with open(combine_file, 'w') as result:
            for file_ in files:
                if (file_.endswith(".ptt")) and (os.stat(file_).st_size == 0):
                    print("Warning: No CDS information, "
                          "TransTermHP can not work!")
                    return "NO_CDS"
                if os.path.exists(file_) and (
                        os.stat(file_).st_size != 0):
                    check_start = False
                    fh = open(file_, 'r')
                    for line in fh:
                        if check_start:
                            result.write(line)
                        if "Location" in line:
                            check_start = True
                    if "\n" not in line:
                        result.write("\n")
                    fh.close()
        return "Normal"

    def _make_gff_folder(self):
        self.helper.check_make_folder(self.terms["all"])
        self.helper.check_make_folder(self.csvs["all"])
        self.helper.check_make_folder(self.terms["best"])
        self.helper.check_make_folder(self.csvs["best"])
        self.helper.check_make_folder(self.terms["express"])
        self.helper.check_make_folder(self.csvs["express"])
        self.helper.check_make_folder(self.terms["non"])
        self.helper.check_make_folder(self.csvs["non"])

    def _convert_gff2rntptt(self, gff_path, fasta_path, sRNAs, log):
        file_types = {}
        prefixs = []
        for gff in os.listdir(gff_path):
            if gff.endswith(".gff"):
                filename = gff.split("/")
                prefix = filename[-1][:-4]
                prefixs.append(prefix)
                gff_file = os.path.join(gff_path, gff)
                rnt_file = os.path.join(gff_path, gff.replace(".gff", ".rnt"))
                ptt_file = os.path.join(gff_path, gff.replace(".gff", ".ptt"))
                fasta = self.helper.get_correct_file(
                             fasta_path, ".fa", prefix, None, None)
                if not fasta:
                    log.write("{0}.fa can not be found.\n".format(prefix))
                    print("Error: {0}.fa can not be found!".format(prefix))
                    sys.exit()
                if sRNAs:
                    self.multiparser.parser_gff(sRNAs, "sRNA")
                    srna = self.helper.get_correct_file(
                            self.srna_path, "_sRNA.gff", prefix, None, None)
                    if (srna) and (fasta):
                        log.write("Running converter.py to convert {0} and "
                                  "{1} to {2}, {3}, and {4}.\n".format(
#.........这里部分代码省略.........
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:103,代码来源:terminator.py

示例9: SubLocal

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
class SubLocal(object):
    '''detection of subcellular localization'''

    def __init__(self, args_sub):
        self.multiparser = Multiparser()
        self.helper = Helper()
        self.fixer = FormatFixer()
        self.gff_path = os.path.join(args_sub.gffs, "tmp")
        self.fasta_path = os.path.join(args_sub.fastas, "tmp")
        if args_sub.trans is not None:
            self.tran_path = os.path.join(args_sub.trans, "tmp")
        else:
            self.tran_path = None
        self.out_all = os.path.join(args_sub.out_folder, "all_CDSs")
        self.out_express = os.path.join(args_sub.out_folder, "expressed_CDSs")
        self.all_tmp_path = os.path.join(self.out_all, "tmp")
        self.express_tmp_path = os.path.join(self.out_express, "tmp")
        self.all_stat_path = os.path.join(self.out_all, "statistics")
        self.express_stat_path = os.path.join(self.out_express, "statistics")
        self.all_tmp_result = os.path.join(self.out_all, "tmp_results")
        self.express_tmp_result = os.path.join(self.out_express, "tmp_results")
        self.all_result = os.path.join(self.out_all, "psortb_results")
        self.express_result = os.path.join(self.out_express, "psortb_results")
        self.endfix_table = "table.csv"
        self.endfix_raw = "raw.txt"
        self._make_folder()

    def _make_folder(self):
        self.helper.check_make_folder(self.out_all)
        self.helper.check_make_folder(self.out_express)
        self.helper.check_make_folder(self.all_stat_path)
        self.helper.check_make_folder(self.express_stat_path)
        self.helper.check_make_folder(self.all_result)
        self.helper.check_make_folder(self.express_result)

    def _compare_cds_tran(self, gff_file, tran_file, log):
        '''compare CDS and transcript to find the expressed CDS'''
        log.write("Comparing transcripts and CDSs to get expressed CDSs.\n")
        out = open(os.path.join(self.out_all, "tmp_cds.gff"), "w")
        cdss = []
        fh = open(gff_file)
        th = open(tran_file)
        for entry in Gff3Parser().entries(fh):
            if entry.feature == "CDS":
                cdss.append(entry)
        trans = []
        for entry in Gff3Parser().entries(th):
            trans.append(entry)
        for cds in cdss:
            for ta in trans:
                if (cds.strand == ta.strand) and (
                        cds.seq_id == ta.seq_id):
                    if ((cds.end < ta.end) and (
                             cds.end > ta.start) and (
                             cds.start <= ta.start)) or (
                            (cds.start > ta.start) and (
                             cds.start < ta.end) and (
                             cds.end >= ta.end)) or (
                            (cds.end >= ta.end) and (
                             cds.start <= ta.start)) or (
                            (cds.end <= ta.end) and (
                             cds.start >= ta.start)):
                        out.write(cds.info + "\n")
                        break
        fh.close()
        th.close()
        out.close()
        log.write("\t" + os.path.join(self.out_all, "tmp_cds.gff") + " is "
                  "temporary generated.\n")

    def _get_protein_seq(self, gff, tmp_path, tran_path, args_sub, log):
        prefix = gff.replace(".gff", "")
        fasta = self.helper.get_correct_file(self.fasta_path, ".fa",
                                             prefix, None, None)
        dna_seq_file = os.path.join(tmp_path, "_".join([prefix, "dna.fa"]))
        print("Generating CDS fasta files of {0}".format(prefix))
        if tran_path is not None:
            log.write("Predicting subcellular localization for expressed "
                      "CDSs for {0}.\n".format(prefix))
            self._compare_cds_tran(os.path.join(self.gff_path, gff),
                                   os.path.join(tran_path, "_".join([
                                       prefix, "transcript.gff"])), log)
            log.write("Running helper.py to extract sequences for CDSs.\n")
            self.helper.get_cds_seq(os.path.join(self.out_all, "tmp_cds.gff"),
                                    fasta, dna_seq_file)
            os.remove(os.path.join(self.out_all, "tmp_cds.gff"))
        else:
            log.write("Predicting subcellular localization for all CDSs for "
                      "{0}.\n".format(prefix))
            log.write("Running helper.py to extract sequences for CDSs.\n")
            self.helper.get_cds_seq(os.path.join(self.gff_path, gff),
                                    fasta, dna_seq_file)
        log.write("\t" + dna_seq_file + " is generated.\n")
        print("Transfering DNA sequences to protein sequence of {0}".format(
            prefix))
        log.write("Running helper.py to translate DNA sequences to Protein "
                  "sequences.\n")
        tmp_file = os.path.join(args_sub.out_folder, "tmp")
        self.helper.translation(dna_seq_file, tmp_file)
        prot_seq_file = os.path.join(
#.........这里部分代码省略.........
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:103,代码来源:sublocal.py

示例10: RATT

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
class RATT(object):
    '''annotation transfer'''

    def __init__(self, args_ratt):
        self.multiparser = Multiparser()
        self.converter = Converter()
        self.format_fixer = FormatFixer()
        self.helper = Helper()
        if args_ratt.ref_gbk:
            self.gbk = os.path.join(args_ratt.ref_gbk, "gbk_tmp")
            self.gbk_tmp = os.path.join(self.gbk, "tmp")
            self.embl = os.path.join(args_ratt.ref_gbk, "embls")
        if args_ratt.ref_embls:
            self.embl = args_ratt.ref_embls
        self.ratt_log = os.path.join(args_ratt.output_path, "ratt_log.txt")
        self.tmp_files = {"tar": os.path.join(args_ratt.tar_fastas, "tmp"),
                          "ref": os.path.join(args_ratt.ref_fastas, "tmp"),
                          "out_gff": os.path.join(args_ratt.gff_outfolder,
                                                  "tmp"),
                          "gff": os.path.join(args_ratt.gff_outfolder,
                                              "tmp.gff"),
                          "ptt": os.path.join(args_ratt.gff_outfolder,
                                              "tmp.ptt"),
                          "rnt": os.path.join(args_ratt.gff_outfolder,
                                              "tmp.rnt")}

    def _convert_to_pttrnt(self, gffs, files, log):
        for gff in files:
            if gff.endswith(".gff"):
                gff = os.path.join(gffs, gff)
                filename = gff.split("/")
                prefix = filename[-1][:-4]
                rnt = gff[:-3] + "rnt"
                ptt = gff[:-3] + "ptt"
                fasta = self.helper.get_correct_file(self.tmp_files["tar"],
                                                     ".fa", prefix, None, None)
                if fasta:
                    self.converter.convert_gff2rntptt(gff, fasta, ptt, rnt,
                                                      None, None)
                    log.write("\t" + ptt + " is generated.\n")
                    log.write("\t" + rnt + " is generated.\n")

    def _remove_files(self, args_ratt, out_gbk, log):
        self.helper.remove_all_content(args_ratt.gff_outfolder, ".gff", "file")
        self.helper.remove_all_content(args_ratt.gff_outfolder, ".ptt", "file")
        self.helper.remove_all_content(args_ratt.gff_outfolder, ".rnt", "file")
        log.write("Moving the final output files to {0}.\n".format(args_ratt.gff_outfolder))
        self.helper.move_all_content(self.tmp_files["out_gff"],
                                     args_ratt.gff_outfolder, None)
        log.write("Remove the temperary files.\n")
        shutil.rmtree(self.tmp_files["out_gff"])
        shutil.rmtree(self.tmp_files["tar"])
        shutil.rmtree(self.tmp_files["ref"])
        self.helper.remove_tmp_dir(args_ratt.tar_fastas)
        self.helper.remove_tmp_dir(args_ratt.ref_fastas)
        self.helper.remove_tmp_dir(args_ratt.ref_embls)
        self.helper.remove_tmp_dir(args_ratt.ref_gbk)

    def _convert_to_gff(self, ratt_result, args_ratt, files, log):
        name = ratt_result.split(".")
        filename = ".".join(name[1:-2]) + ".gff"
        output_file = os.path.join(args_ratt.output_path, filename)
        self.converter.convert_embl2gff(
             os.path.join(args_ratt.output_path, ratt_result), output_file)
        self.format_fixer.fix_ratt(output_file, ".".join(name[1:-2]),
                                   "tmp_gff")
        shutil.move("tmp_gff", output_file)
        shutil.copy(output_file, os.path.join(args_ratt.gff_outfolder,
                                              filename))
        log.write("\t" + os.path.join(args_ratt.gff_outfolder, filename) + 
                  " is generated.\n")
        files.append(filename)

    def _parser_embl_gbk(self, files):
        self.helper.check_make_folder(self.gbk)
        for file_ in files:
            close = False
            with open(file_, "r") as f_h:
                for line in f_h:
                    if (line.startswith("LOCUS")):
                        out = open(self.gbk_tmp, "w")
                        datas = line.split(" ")
                        for data in datas:
                            if (len(data) != 0) and (data != "LOCUS"):
                                filename = ".".join([data.strip(), "gbk"])
                                break
                    elif (line.startswith("VERSION")):
                        datas = line.split(" ")
                        for data in datas:
                            if (len(data) != 0) and (data != "VERSION"):
                                new_filename = ".".join([data.strip(), "gbk"])
                                break
                        if new_filename.find(filename):
                            filename = new_filename
                    if out:
                        out.write(line)
                    if line.startswith("//"):
                        out.close()
                        close = True
                        shutil.move(self.gbk_tmp,
#.........这里部分代码省略.........
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:103,代码来源:ratt.py

示例11: TSSpredator

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]

#.........这里部分代码省略.........
            tmp_tss = os.path.join(self.tmps["tmp"], "_".join([
                          prefix, args_tss.program + ".gff"]))
            pre_tss = os.path.join(self.gff_outfolder, "_".join([
                          prefix, args_tss.program + ".gff"]))
            check_orphan(pre_tss, os.path.join(
                args_tss.gffs, prefix + ".gff"),
                "tmp/merge_forward.wig", "tmp/merge_reverse.wig", tmp_tss)
            shutil.move(tmp_tss, pre_tss)
        shutil.rmtree("tmp")

    def _remove_files(self, args_tss):
        print("Remove temperary files and folders")
        self.helper.remove_tmp_dir(args_tss.fastas)
        self.helper.remove_tmp_dir(args_tss.gffs)
        self.helper.remove_tmp_dir(args_tss.ta_files)
        if "merge_forward.wig" in os.listdir(os.getcwd()):
            os.remove("merge_forward.wig")
        if "merge_reverse.wig" in os.listdir(os.getcwd()):
            os.remove("merge_reverse.wig")
        shutil.rmtree(args_tss.wig_folder)
        if args_tss.manual is not None:
            shutil.rmtree(args_tss.manual)

    def _deal_with_overlap(self, out_folder, args_tss):
        '''deal with the situation that TSS and 
        processing site at the same position'''
        if not args_tss.overlap_feature:
            pass
        else:
            print("Comparing TSSs and Processing sites")
            if args_tss.program.lower() == "tss":
                for tss in os.listdir(out_folder):
                    if tss.endswith("_TSS.gff"):
                        ref = self.helper.get_correct_file(
                                args_tss.overlap_gffs, "_processing.gff",
                                tss.replace("_TSS.gff", ""), None, None)
                        filter_tss_pro(os.path.join(out_folder, tss),
                                       ref, args_tss.program,
                                       args_tss.cluster)
            elif args_tss.program.lower() == "processing":
                for tss in os.listdir(out_folder):
                    if tss.endswith("_processing.gff"):
                        ref = self.helper.get_correct_file(
                                args_tss.overlap_gffs, "_TSS.gff",
                                tss.replace("_processing.gff", ""), None, None)
                        filter_tss_pro(os.path.join(out_folder, tss),
                                       ref, args_tss.program,
                                       args_tss.cluster)

    def _low_expression(self, args_tss, gff_folder):
        '''deal with the low expressed TSS'''
        prefix = None
        self._merge_wigs(args_tss.wig_folder, "wig", args_tss.libs)
        for gff in os.listdir(gff_folder):
            if (args_tss.program.lower() == "tss") and (
                    gff.endswith("_TSS.gff")):
                prefix = gff.replace("_TSS.gff", "")
            elif (args_tss.program.lower() == "processing") and (
                    gff.endswith("_processing.gff")):
                prefix = gff.replace("_processing.gff", "")
            if prefix:
                out = open(os.path.join(
                    self.stat_outfolder, prefix, "_".join([
                        "stat", prefix, "low_expression_cutoff.csv"])), "w")
                out.write("\t".join(["Genome", "Cutoff_coverage"]) + "\n")
                cutoff = filter_low_expression(
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:70,代码来源:tsspredator.py

示例12: UTRDetection

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
class UTRDetection(object):

    def __init__(self, args_utr):
        self.helper = Helper()
        self.multiparser = Multiparser()
        self.tss_path = os.path.join(args_utr.tsss, "tmp")
        self.tran_path = os.path.join(args_utr.trans, "tmp")
        self.utr5_path = os.path.join(args_utr.out_folder, "5UTR")
        self.utr3_path = os.path.join(args_utr.out_folder, "3UTR")
        self.utr5_stat_path = os.path.join(self.utr5_path, "statistics")
        self.utr3_stat_path = os.path.join(self.utr3_path, "statistics")

    def _check_folder(self, folder):
        if folder is None:
            print("Error: lack required files!!!")
            sys.exit()

    def _check_gff(self, folder):
        for gff in os.listdir(folder):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(folder, gff))

    def _compute_utr(self, args_utr):
        for gff in os.listdir(args_utr.gffs):
            if gff.endswith(".gff"):
                prefix = gff[:-4]
                tss = self.helper.get_correct_file(
                        self.tss_path, "_TSS.gff", prefix, None, None)
                tran = self.helper.get_correct_file(
                        self.tran_path, "_transcript.gff", prefix, None, None)
                if args_utr.terms:
                    term = self.helper.get_correct_file(
                                os.path.join(args_utr.terms, "tmp"),
                                "_term.gff", prefix, None, None)
                else:
                    term = None
                print("computing 5'UTR of {0} .....".format(prefix))
                detect_5utr(tss, os.path.join(args_utr.gffs, gff),
                            tran, os.path.join(self.utr5_path, "gffs",
                            "_".join([prefix, "5UTR.gff"])), args_utr)
                print("computing 3'UTR of {0} .....".format(prefix))
                detect_3utr(tran, os.path.join(args_utr.gffs, gff),
                            term, os.path.join(self.utr3_path, "gffs",
                            "_".join([prefix, "3UTR.gff"])), args_utr)
                self.helper.move_all_content(
                    os.getcwd(), self.utr5_stat_path, ["_5utr_length.png"])
                self.helper.move_all_content(
                    os.getcwd(), self.utr3_stat_path, ["_3utr_length.png"])

    def run_utr_detection(self, args_utr):
        self._check_folder(args_utr.tsss)
        self._check_folder(args_utr.gffs)
        self._check_folder(args_utr.trans)
        self._check_gff(args_utr.tsss)
        self._check_gff(args_utr.gffs)
        self._check_gff(args_utr.trans)
        self._check_gff(args_utr.terms)
        self.multiparser.parser_gff(args_utr.gffs, None)
        self.multiparser.parser_gff(args_utr.tsss, "TSS")
        self.multiparser.combine_gff(args_utr.gffs, self.tss_path, None, "TSS")
        self.multiparser.parser_gff(args_utr.trans, "transcript")
        self.multiparser.combine_gff(args_utr.gffs, self.tran_path,
                                     None, "transcript")
        if args_utr.terms:
            self.multiparser.parser_gff(args_utr.terms, "term")
            self.multiparser.combine_gff(args_utr.gffs,
                                         os.path.join(args_utr.terms, "tmp"),
                                         None, "term")
        self._compute_utr(args_utr)
        self.helper.remove_tmp(args_utr.gffs)
        self.helper.remove_tmp(args_utr.tsss)
        self.helper.remove_tmp(args_utr.trans)
        self.helper.remove_tmp(args_utr.terms)
        self.helper.remove_tmp(self.utr5_path)
        self.helper.remove_tmp(self.utr3_path)
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:77,代码来源:utr.py

示例13: TSSpredator

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]

#.........这里部分代码省略.........

    def _check_orphan(self, prefixs, wig_folder, args_tss):
        for prefix in prefixs:
            self._merge_wigs(wig_folder, prefix, args_tss.libs)
            tmp_tss = os.path.join(self.tmps["tmp"], "_".join([
                          prefix, args_tss.program + ".gff"]))
            pre_tss = os.path.join(self.gff_outfolder, "_".join([
                          prefix, args_tss.program + ".gff"]))
            check_orphan(pre_tss, os.path.join(
                args_tss.gffs, prefix + ".gff"),
                "tmp/merge_forward.wig", "tmp/merge_reverse.wig", tmp_tss)
            shutil.move(tmp_tss, pre_tss)
        shutil.rmtree("tmp")

    def _remove_files(self, args_tss):
        print("Remove temperary files and folders...")
        self.helper.remove_tmp(args_tss.fastas)
        self.helper.remove_tmp(args_tss.gffs)
        self.helper.remove_tmp(args_tss.wig_folder)
        self.helper.remove_tmp(args_tss.ta_files)
        if "merge_forward.wig" in os.listdir(os.getcwd()):
            os.remove("merge_forward.wig")
        if "merge_reverse.wig" in os.listdir(os.getcwd()):
            os.remove("merge_reverse.wig")

    def _deal_with_overlap(self, out_folder, args_tss):
        if args_tss.overlap_feature.lower() == "both":
            pass
        else:
            print("Comparing TSS and Processing site...")
            if args_tss.program.lower() == "tss":
                for tss in os.listdir(out_folder):
                    if tss.endswith("_TSS.gff"):
                        ref = self.helper.get_correct_file(
                                args_tss.references, "_processing.gff",
                                tss.replace("_TSS.gff", ""), None, None)
                        filter_tss_pro(os.path.join(out_folder, tss),
                                       ref, args_tss.overlap_feature,
                                       args_tss.cluster)
            elif args_tss.program.lower() == "processing_site":
                for tss in os.listdir(out_folder):
                    if tss.endswith("_processing.gff"):
                        ref = self.helper.get_correct_file(
                                args_tss.references, "_TSS.gff",
                                tss.replace("_processing.gff", ""), None, None)
                        filter_tss_pro(os.path.join(out_folder, tss),
                                       ref, args_tss.overlap_feature,
                                       args_tss.cluster)

    def _low_expression(self, args_tss, gff_folder):
        prefix = None
        self._merge_wigs(args_tss.wig_folder, "wig", args_tss.libs)
        for gff in os.listdir(gff_folder):
            if (args_tss.program.lower() == "tss") and (
                    gff.endswith("_TSS.gff")):
                prefix = gff.replace("_TSS.gff", "")
            elif (args_tss.program.lower() == "processing") and (
                    gff.endswith("_processing.gff")):
                prefix = gff.replace("_processing.gff", "")
            if prefix:
                out = open(os.path.join(
                    self.stat_outfolder, prefix, "_".join([
                        "stat", prefix, "low_expression_cutoff.csv"])), "w")
                out.write("\t".join(["strain", "cutoff_coverage"]) + "\n")
                cutoff = filter_low_expression(
                        os.path.join(gff_folder, gff), args_tss,
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:70,代码来源:tsspredator.py


注:本文中的annogesiclib.helper.Helper.get_correct_file方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。