当前位置: 首页>>代码示例>>Python>>正文


Python Helper.sort_gff方法代码示例

本文整理汇总了Python中annogesiclib.helper.Helper.sort_gff方法的典型用法代码示例。如果您正苦于以下问题:Python Helper.sort_gff方法的具体用法?Python Helper.sort_gff怎么用?Python Helper.sort_gff使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在annogesiclib.helper.Helper的用法示例。


在下文中一共展示了Helper.sort_gff方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: TestHelper

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import sort_gff [as 别名]
class TestHelper(unittest.TestCase):

    def setUp(self):
        self.example = ExampleData()
        self.helper = Helper()
        self.gff_out = self.example.gff_out
        self.rev_seq = self.example.rev_seq.replace("\n", "")
        self.test_folder = "test_folder"
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)        
        self.gff_file = os.path.join(self.test_folder, "test.gff")
        with open(self.gff_file, "w") as rh:
            rh.write(self.example.gff_file)        
        self.seq_file = os.path.join(self.test_folder, "test.fa")
        with open(self.seq_file, "w") as rh:
            rh.write(self.example.seq)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_remove_all_content(self):
        tmp1 = os.path.join(self.test_folder, "tmp1.gff")
        tmp2 = os.path.join(self.test_folder, "tmp2")
        shutil.copyfile(self.gff_file, tmp1)
        os.mkdir(tmp2)
        self.helper.remove_all_content(self.test_folder, "tmp", "file")
        self.assertFalse(os.path.exists(tmp1))
        self.assertTrue(os.path.exists(tmp2))
        self.helper.remove_all_content(self.test_folder, "tmp", "dir")
        self.assertFalse(os.path.exists(tmp2))
        self.assertTrue(os.path.exists(self.gff_file))

    def test_remove_tmp(self):
        tmp1 = os.path.join(self.test_folder, "tmp")
        tmp2 = os.path.join(self.test_folder, "test.gff_folder")
        os.mkdir(tmp1)
        os.mkdir(tmp2)
        self.helper.remove_tmp(self.test_folder)
        self.assertFalse(os.path.exists(tmp1))
        self.assertFalse(os.path.exists(tmp2))

    def test_get_correct_file(self):
        gff_file = os.path.join(self.test_folder, "test.gff")
        wig_f_file = os.path.join(self.test_folder,
                                  "test_forward.wig_STRAIN_aaa.wig")
        wig_r_file = os.path.join(self.test_folder,
                                  "test_reverse.wig_STRAIN_aaa.wig")
        shutil.copyfile(gff_file, wig_f_file)
        shutil.copyfile(gff_file, wig_r_file)
        libs = ["test_forward.wig_STRAIN_aaa.wig:frag:1:a:+",
                "test_reverse.wig_STRAIN_aaa.wig:frag:1:a:-"]
        filename = self.helper.get_correct_file(
            self.test_folder, ".gff", "test", None, libs)
        self.assertEqual(filename, gff_file)
        
    def test_sorf_gff(self):
        out_file = os.path.join(self.test_folder, "test.out")
        self.helper.sort_gff(self.gff_file, out_file)
        datas = import_data(out_file)
        self.assertEqual(set(datas), set(self.gff_out.split("\n")))

    def test_extract_gene(self):
        seq = self.example.seq.replace("\n", "")
        new_seq = self.helper.extract_gene(seq, 1, 70, "+")
        self.assertEqual(new_seq,
        "CGCAGGTTGAGTTCCTGTTCCCGATAGATCCGATAAACCCGCTTATGATTCCAGAGCTGTCCCTGCACAT")
        new_seq = self.helper.extract_gene(seq, 1, 140, "-")
        self.assertEqual(new_seq, self.rev_seq)

    def test_get_seq(self):
        gff_file = os.path.join(self.test_folder, "test.gff")
        out_file = os.path.join(self.test_folder, "test.cds")
        lines = self.example.gff_out.split("\n")
        with open(gff_file, "w") as gh:
            gh.write(lines[1])
        self.helper.get_seq(self.gff_file, self.seq_file, out_file)
        datas = import_data(out_file)
        self.assertEqual(set(datas), set([">cds0|aaa|1|10|+", "CGCAGGTTGA"]))
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:81,代码来源:test_helper.py

示例2: Terminator

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import sort_gff [as 别名]

#.........这里部分代码省略.........
            for wig in os.listdir(args_term.tex_wigs):
                if os.path.isdir(os.path.join(args_term.tex_wigs, wig)):
                    pass
                else:
                    shutil.copy(os.path.join(args_term.tex_wigs, wig),
                                merge_wigs)
            for wig in os.listdir(args_term.frag_wigs):
                if os.path.isdir(os.path.join(args_term.frag_wigs, wig)):
                    pass
                else:
                    shutil.copy(os.path.join(args_term.frag_wigs, wig),
                                merge_wigs)
        elif (args_term.tex_wigs is not None):
            merge_wigs = args_term.tex_wigs
        elif (args_term.frag_wigs is not None):
            merge_wigs = args_term.frag_wigs
        else:
            print("Error: no proper wig files!!!")
            sys.exit()
        return merge_wigs

    def _merge_sRNA(self, sRNAs, prefixs, gff_path):
        if sRNAs is not None:
            self.multiparser.parser_gff(sRNAs, "sRNA")
            self.helper.check_make_folder(self.tmps["merge"])
            for prefix in prefixs:
                tmp_gff = os.path.join(self.tmps["merge"], self.tmps["gff"])
                if self.tmps["gff"] in os.listdir(self.tmps["merge"]):
                    os.remove(tmp_gff)
                self.helper.merge_file(os.path.join(gff_path, prefix + ".gff"),
                                       tmp_gff)
                self.helper.merge_file(os.path.join(
                    self.srna_path, "_".join([prefix, "sRNA.gff"])), tmp_gff)
                self.helper.sort_gff(tmp_gff, os.path.join(
                    self.tmps["merge"], prefix + ".gff"))
                os.remove(tmp_gff)
            merge_path = self.tmps["merge"]
        else:
            merge_path = gff_path
        return merge_path

    def _move_file(self, term_outfolder, csv_outfolder):
        for gff in os.listdir(term_outfolder):
            if gff.endswith("_term.gff"):
                self.helper.sort_gff(os.path.join(term_outfolder, gff),
                                     self.tmps["gff"])
                shutil.move(self.tmps["gff"],
                            os.path.join(term_outfolder, gff))
                prefix = gff.replace("_term.gff", "")
                new_gff = os.path.join(self.terms["all"], "_".join([
                        prefix, self.suffixs["allgff"]]))
                csv_file = os.path.join(
                        os.path.join(self.csvs["all"], "_".join([
                            prefix, self.suffixs["csv"]])))
                out = open(new_gff, "w")
                out.write("##gff-version 3\n")
                out.close()
                self.helper.merge_file(
                        os.path.join(term_outfolder, gff),
                        os.path.join(
                            self.terms["all"], "_".join([
                                prefix, self.suffixs["allgff"]])))
                os.remove(os.path.join(term_outfolder, gff))
                pre_strain = ""
                if ("_".join([prefix, self.suffixs["csv"]]) in
                        os.listdir(self.csvs["all"])):
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:70,代码来源:terminator.py

示例3: TranscriptDetection

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import sort_gff [as 别名]
class TranscriptDetection(object):
    '''doing for transcript detection'''

    def __init__(self, args_tran):
        self.multiparser = Multiparser()
        self.helper = Helper()
        self.converter = Converter()
        self.gff_outfolder = os.path.join(args_tran.out_folder, "gffs")
        self.tran_path = os.path.join(self.gff_outfolder, "tmp")
        self.stat_path = os.path.join(args_tran.out_folder, "statistics")
        self.tmps = {"gff": "tmp.gff", "merge": "tmp_merge",
                     "tran": os.path.join(args_tran.out_folder, "tmp_tran"),
                     "tss_ta": os.path.join(self.gff_outfolder, "tmp_tss_ta"),
                     "ta_tss": os.path.join(self.gff_outfolder, "tmp_ta_tss"),
                     "ta_gff": os.path.join(self.gff_outfolder, "tmp_ta_gff"),
                     "gff_ta": os.path.join(self.gff_outfolder, "tmp_gff_ta"),
                     "uni": os.path.join(self.gff_outfolder, "tmp_uni"),
                     "overlap": os.path.join(
                         self.gff_outfolder, "tmp_overlap")}
        self.frag = "transcript_fragment.gff"
        self.tex = "transcript_tex_notex.gff"
        self.endfix_tran = "transcript.gff"

    def _compute_transcript(self, wig_f, wig_r, wig_folder, wig_type, strain,
                            libs, args_tran):
        print("Computing transcripts for {0}".format(strain))
        out = os.path.join(args_tran.out_folder, "_".join([strain, wig_type]))
        detect_transcript(wig_f, wig_r, wig_folder, libs, out, wig_type, args_tran)

    def _compute(self, wig_type, wigs, libs, args_tran):
        strains = []
        wig_folder = os.path.join(wigs, "tmp")
        for wig in os.listdir(wig_folder):
            if wig.endswith("_forward.wig"):
                strains.append(wig.replace("_forward.wig", ""))
        for strain in strains:
            f_file = os.path.join(wig_folder, "_".join(
                [strain, "forward.wig"]))
            r_file = os.path.join(wig_folder, "_".join(
                [strain, "reverse.wig"]))
            self._compute_transcript(f_file, r_file, wigs, wig_type,
                                     strain, libs, args_tran)
        return strains

    def _compare_tss(self, tas, args_tran, log):
        self.multiparser.parser_gff(args_tran.compare_tss, "TSS")
        self.multiparser.combine_gff(
                self.gff_outfolder,
                os.path.join(args_tran.compare_tss, "tmp"),
                "transcript", "TSS")
        print("Comaring of transcripts and TSSs")
        log.write("Running stat_TA_comparison.py to compare transcripts "
                  "with TSSs.\n")
        tss_folder = os.path.join(args_tran.compare_tss, "tmp")
        for ta in tas:
            ta_file = os.path.join(self.gff_outfolder,
                                   "_".join([ta, self.endfix_tran]))
            stat_tss_out = os.path.join(
                    self.stat_path, "".join([
                        "stat_compare_transcript_TSS_",
                        ta, ".csv"]))
            for tss in os.listdir(tss_folder):
                filename = tss.split("_TSS")
                if (filename[0] == ta) and (tss.endswith(".gff")):
                    stat_ta_tss(ta_file, os.path.join(tss_folder, tss),
                                stat_tss_out, self.tmps["ta_tss"],
                                self.tmps["tss_ta"], args_tran.fuzzy)
                    os.remove(ta_file)
                    os.remove(os.path.join(tss_folder, tss))
                    self.helper.sort_gff(self.tmps["ta_tss"], ta_file)
                    self.helper.sort_gff(
                            self.tmps["tss_ta"], os.path.join(
                                args_tran.compare_tss, tss))
                    os.remove(self.tmps["tss_ta"])
                    os.remove(self.tmps["ta_tss"])
            log.write("\t" + stat_tss_out + "\n")

    def _compare_cds(self, tas, args_tran, log):
        self.multiparser.parser_gff(args_tran.gffs, None)
        self.multiparser.combine_gff(
            self.gff_outfolder, os.path.join(args_tran.gffs, "tmp"),
            "transcript", None)
        print("Comaring of transcripts and genome annotations")
        cds_folder = os.path.join(args_tran.gffs, "tmp")
        log.write("Running stat_TA_comparison.py to compare transcripts "
                  "with genome annotations.\n")
        for ta in tas:
            ta_file = os.path.join(self.gff_outfolder,
                                   "_".join([ta, self.endfix_tran]))
            stat_gff_out = os.path.join(self.stat_path, "".join([
                "stat_compare_transcript_genome_", ta, ".csv"]))
            for gff in os.listdir(cds_folder):
                if (gff[:-4] == ta) and (gff.endswith(".gff")):
                    cds_file = os.path.join(cds_folder, gff)
                    stat_ta_gff(ta_file, cds_file, stat_gff_out,
                                self.tmps["ta_gff"], self.tmps["gff_ta"],
                                args_tran.c_feature)
                    os.remove(ta_file)
                    os.remove(os.path.join(args_tran.gffs, gff))
                    self.helper.sort_gff(self.tmps["ta_gff"], ta_file)
#.........这里部分代码省略.........
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:103,代码来源:transcript.py

示例4: sRNADetection

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import sort_gff [as 别名]
class sRNADetection(object):

    def __init__(self, args_srna):
        self.args_container = ArgsContainer()
        self.helper = Helper()
        self.multiparser = Multiparser()
        self.gff_output = os.path.join(args_srna.out_folder, "gffs")
        self.table_output = os.path.join(args_srna.out_folder, "tables")
        self.stat_path = os.path.join(args_srna.out_folder, "statistics")
        self.tss_path = self._check_folder_exist(args_srna.tss_folder)
        self.pro_path = self._check_folder_exist(args_srna.pro_folder)
        self.sorf_path = self._check_folder_exist(args_srna.sorf_file)
        self.fasta_path = os.path.join(args_srna.fastas, "tmp")
        self.tran_path = os.path.join(args_srna.trans, "tmp")
        self.term_path = self._check_folder_exist(args_srna.terms)
        self.merge_wigs = os.path.join(args_srna.out_folder, "merge_wigs")
        self.prefixs = {"merge": os.path.join(
                            args_srna.out_folder, "tmp_merge"),
                        "utr": os.path.join(
                            args_srna.out_folder, "tmp_utrsrna"),
                        "normal": os.path.join(
                            args_srna.out_folder, "tmp_normal"),
                        "in_cds": os.path.join(
                            args_srna.out_folder, "tmp_incds"),
                        "merge_table": os.path.join(
                            args_srna.out_folder, "tmp_merge_table"),
                        "utr_table": os.path.join(
                            args_srna.out_folder, "tmp_utrsrna_table"),
                        "normal_table": os.path.join(
                            args_srna.out_folder, "tmp_normal_table"),
                        "in_cds_table": os.path.join(
                            args_srna.out_folder, "tmp_incds_table"),
                        "basic": os.path.join(
                            args_srna.out_folder, "tmp_basic"),
                        "energy": os.path.join(
                            args_srna.out_folder, "tmp_energy")}
        self.tmps = {"nr": os.path.join(args_srna.out_folder, "tmp_nr"),
                     "srna": os.path.join(args_srna.out_folder, "tmp_sRNA")}
        self.best_table = os.path.join(self.table_output, "best")
        self.table_output = os.path.join(args_srna.out_folder, "tables")
        self.stat_path = os.path.join(args_srna.out_folder, "statistics")
        self.all_best = {"all_gff": os.path.join(
                             self.gff_output, "all_candidates"),
                         "best_gff": os.path.join(self.gff_output, "best"),
                         "all_table": os.path.join(
                             self.table_output, "all_candidates"),
                         "best_table": os.path.join(self.table_output, "best")}

    def _check_folder_exist(self, folder):
        if folder is not None:
            path = os.path.join(folder, "tmp")
        else:
            path = None
        return path

    def _check_gff(self, gffs):
        for gff in os.listdir(gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(gffs, gff))

    def _run_format(self, blast_path, database, type_, db_file, err):
        call([os.path.join(blast_path, "makeblastdb"), "-in", database,
              "-dbtype", type_, "-out", db_file], stderr=err)

    def _formatdb(self, database, type_, out_folder,
                  blast_path, database_type):
        err = open(os.path.join(out_folder, "log.txt"), "w")
        if (database.endswith(".fa")) or (
                database.endswith(".fna")) or (
                database.endswith(".fasta")):
            pass
        else:
            folders = database.split("/")
            filename = folders[-1]
            folder = "/".join(folders[:-1])
            for fasta in os.listdir(folder):
                if (fasta.endswith(".fa")) or (
                        fasta.endswith(".fna")) or (
                        fasta.endswith(".fasta")):
                    if ".".join(fasta.split(".")[:-1]) == filename:
                        database = os.path.join(folder, fasta)
        if database_type == "sRNA":
            change_format(database, "tmp_srna_database")
            os.remove(database)
            shutil.move("tmp_srna_database", database)
        db_file = ".".join(database.split(".")[:-1])
        self._run_format(blast_path, database, type_, db_file, err)
        err.close()

    def _merge_frag_tex_file(self, files, args_srna):
        if (args_srna.frag_wigs is not None) and (
                args_srna.tex_wigs is not None):
            self.helper.merge_file(files["frag_gff"], files["tex_gff"])
            self.helper.merge_file(files["frag_csv"], files["tex_csv"])
            shutil.move(files["tex_csv"], files["merge_csv"])
            self.helper.sort_gff(files["tex_gff"], files["merge_gff"])
            os.remove(files["frag_csv"])
            os.remove(files["frag_gff"])
            os.remove(files["tex_gff"])
        elif (args_srna.frag_wigs is not None):
#.........这里部分代码省略.........
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:103,代码来源:srna.py

示例5: Multiparser

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import sort_gff [as 别名]

#.........这里部分代码省略.........
                                    first = False
                                else:
                                    out.close()
                                    out_t.close()
                                out = open(os.path.join(
                                           out_path, name + ".fa"), "w")
                                out_t = open(os.path.join(
                                             par_tmp, name + ".fa"), "w")
                                out.write(">" + name + "\n")
                                out_t.write(">" + name + "\n")
                            else:
                                out.write(line)
                                out_t.write(line)
        out.close()
        out_t.close()

    def parser_gff(self, gff_folder, feature):
        par_tmp = os.path.join(gff_folder, "tmp")
        out = None
        out_t = None
        first = True
        if feature is None:
            feature = ""
        else:
            feature = "_" + feature
        self.helper.check_make_folder(par_tmp)
        for filename in os.listdir(gff_folder):
            pre_seq_id = ""
            if ("_folder" not in filename) and ("tmp" != filename):
                out_path = os.path.join(gff_folder, filename + "_folder")
                if ".gff" in filename:
                    print("Parser " + filename + "...")
                    self.helper.check_make_folder(out_path)
                    self.helper.sort_gff(os.path.join(gff_folder, filename),
                                         os.path.join(gff_folder, "tmp.gff"))
                    f_h = open(os.path.join(gff_folder, "tmp.gff"), "r")
                    for row in csv.reader(f_h, delimiter="\t"):
                        if row[0].startswith("#"):
                            continue
                        else:
                            if pre_seq_id == row[0]:
                                out.write("\t".join(row) + "\n")
                                out_t.write("\t".join(row) + "\n")
                            else:
                                if first:
                                    first = False
                                else:
                                    out.close()
                                    out_t.close()
                                out = open(os.path.join(out_path,
                                           row[0] + feature + ".gff"), "w")
                                out_t = open(os.path.join(par_tmp,
                                             row[0] + feature + ".gff"), "w")
                                pre_seq_id = row[0]
                                out.write("\t".join(row) + "\n")
                                out_t.write("\t".join(row) + "\n")
                    f_h.close()
        if os.path.exists(os.path.join(gff_folder, "tmp.gff")):
            os.remove(os.path.join(gff_folder, "tmp.gff"))
        out.close()
        out_t.close()

    def parser_wig(self, wig_folder):
        par_tmp = os.path.join(wig_folder, "tmp")
        first = True
        out = None
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:70,代码来源:multiparser.py

示例6: Terminator

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import sort_gff [as 别名]

#.........这里部分代码省略.........
                if os.path.isdir(os.path.join(args_term.tex_wigs, wig)):
                    pass
                else:
                    shutil.copy(os.path.join(args_term.tex_wigs, wig),
                                merge_wigs)
            for wig in os.listdir(args_term.frag_wigs):
                if os.path.isdir(os.path.join(args_term.frag_wigs, wig)):
                    pass
                else:
                    shutil.copy(os.path.join(args_term.frag_wigs, wig),
                                merge_wigs)
        elif (args_term.tex_wigs is not None):
            merge_wigs = args_term.tex_wigs
        elif (args_term.frag_wigs is not None):
            merge_wigs = args_term.frag_wigs
        else:
            print("Error: Wiggle files are not assigned!")
            sys.exit()
        return merge_wigs

    def _merge_sRNA(self, sRNAs, prefixs, gff_path):
        '''searching the terminator with sRNA information'''
        if sRNAs is not None:
            self.multiparser.parser_gff(sRNAs, "sRNA")
            self.helper.check_make_folder(self.tmps["merge"])
            for prefix in prefixs:
                tmp_gff = os.path.join(self.tmps["merge"], self.tmps["gff"])
                if self.tmps["gff"] in os.listdir(self.tmps["merge"]):
                    os.remove(tmp_gff)
                self.helper.merge_file(os.path.join(gff_path, prefix + ".gff"),
                                       tmp_gff)
                self.helper.merge_file(os.path.join(
                    self.srna_path, "_".join([prefix, "sRNA.gff"])), tmp_gff)
                self.helper.sort_gff(tmp_gff, os.path.join(
                    self.tmps["merge"], prefix + ".gff"))
                os.remove(tmp_gff)
            merge_path = self.tmps["merge"]
        else:
            merge_path = gff_path
        return merge_path

    def _move_file(self, term_outfolder, csv_outfolder):
        for gff in os.listdir(term_outfolder):
            if gff.endswith("_term.gff"):
                self.helper.sort_gff(os.path.join(term_outfolder, gff),
                                     self.tmps["gff"])
                shutil.move(self.tmps["gff"],
                            os.path.join(term_outfolder, gff))
                prefix = gff.replace("_term.gff", "")
                new_gff = os.path.join(self.terms["all"], "_".join([
                        prefix, self.suffixs["allgff"]]))
                csv_file = os.path.join(
                        os.path.join(self.csvs["all"], "_".join([
                            prefix, self.suffixs["csv"]])))
                out = open(new_gff, "w")
                out.write("##gff-version 3\n")
                out.close()
                self.helper.merge_file(
                        os.path.join(term_outfolder, gff),
                        os.path.join(
                            self.terms["all"], "_".join([
                                prefix, self.suffixs["allgff"]])))
                os.remove(os.path.join(term_outfolder, gff))
                pre_strain = ""
                if ("_".join([prefix, self.suffixs["csv"]]) in
                        os.listdir(self.csvs["all"])):
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:70,代码来源:terminator.py

示例7: TSSpredator

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import sort_gff [as 别名]

#.........这里部分代码省略.........
                compare_file = os.path.join(self.gff_outfolder,
                                            "_".join([tss, "processing.gff"]))
            validate_gff(compare_file, os.path.join(args_tss.gffs, gff),
                         stat_file, out_cds_file, args_tss.utr_length,
                         args_tss.program.lower())
            log.write("\t" + stat_file + " is generated.\n")
            shutil.move(out_cds_file, os.path.join(args_tss.gffs, gff))

    def _compare_ta(self, tsss, args_tss, log):
        '''compare TSS with transcript'''
        detect = False
        log.write("Running stat_TA_comparison to compare transcripts "
                  "and TSSs/PSs.\n")
        print("Comparing transcripts and TSSs")
        self.multiparser.parser_gff(args_tss.ta_files, "transcript")
        self.multiparser.combine_gff(args_tss.gffs, self.tmps["ta"],
                                     None, "transcript")
        for tss in tsss:
            stat_out = os.path.join(
                    self.stat_outfolder, tss, "".join([
                        "stat_compare_TSS_transcript_",
                        tss, ".csv"]))
            for ta in os.listdir(self.tmps["ta"]):
                filename = ta.split("_transcript")
                if (filename[0] == tss) and (filename[1] == ".gff"):
                    detect = True
                    break
            compare_file = os.path.join(self.gff_outfolder,
                                        "_".join([tss, "TSS.gff"]))
            if detect:
                stat_ta_tss(os.path.join(self.tmps["ta"], ta), compare_file,
                            stat_out, self.tmps["ta_tss"],
                            self.tmps["tss_ta"], args_tss.fuzzy)
                self.helper.sort_gff(self.tmps["tss_ta"], compare_file)
                self.helper.sort_gff(self.tmps["ta_tss"],
                                     os.path.join(args_tss.ta_files, ta))
                os.remove(self.tmps["tss_ta"])
                os.remove(self.tmps["ta_tss"])
                detect = False
            log.write("\t" + stat_out + " is generated.\n")

    def _stat_tss(self, tsss, feature, log):
        print("Running statistaics")
        for tss in tsss:
            compare_file = os.path.join(self.gff_outfolder,
                                        "_".join([tss, feature]) + ".gff")
            stat_tsspredator(
                compare_file, feature,
                os.path.join(self.stat_outfolder, tss, "_".join([
                    "stat", feature, "class", tss]) + ".csv"),
                os.path.join(self.stat_outfolder, tss, "_".join([
                    "stat", feature, "libs", tss]) + ".csv"))
            self.helper.move_all_content(os.getcwd(), os.path.join(
                self.stat_outfolder, tss), ["_class", ".png"])
            if os.path.exists(os.path.join(
                    self.stat_outfolder, "TSSstatistics.tsv")):
                shutil.move(
                    os.path.join(
                        self.stat_outfolder, "TSSstatistics.tsv"),
                    os.path.join(
                        self.stat_outfolder, tss, "TSSstatistics.tsv"))
            plot_venn(compare_file, feature)
            self.helper.move_all_content(os.getcwd(), os.path.join(
                self.stat_outfolder, tss), ["_venn", ".png"])
            log.write("The following files in {0} are generated:\n".format(
                (os.path.join(self.stat_outfolder, tss))))
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:70,代码来源:tsspredator.py

示例8: TSSpredator

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import sort_gff [as 别名]

#.........这里部分代码省略.........
            if args_tss.program.lower() == "tss":
                compare_file = os.path.join(self.gff_outfolder,
                                            "_".join([tss, "TSS.gff"]))
            elif args_tss.program.lower() == "processing":
                compare_file = os.path.join(self.gff_outfolder,
                                            "_".join([tss, "processing.gff"]))
            validate_gff(compare_file, os.path.join(args_tss.gffs, gff),
                         stat_file, out_cds_file, args_tss.utr_length,
                         args_tss.program.lower())
            shutil.move(out_cds_file, os.path.join(args_tss.gffs, gff))

    def _compare_ta(self, tsss, args_tss):
        detect = False
        print("Running compare transcript assembly and TSS ...")
        self.multiparser.parser_gff(args_tss.ta_files, "transcript")
        self.multiparser.combine_gff(args_tss.gffs, self.tmps["ta"],
                                     None, "transcript")
        for tss in tsss:
            stat_out = os.path.join(
                    self.stat_outfolder, tss, "".join([
                        "stat_compare_TSS_Transcriptome_assembly_",
                        tss, ".csv"]))
            for ta in os.listdir(self.tmps["ta"]):
                filename = ta.split("_transcript")
                if (filename[0] == tss) and (filename[1] == ".gff"):
                    detect = True
                    break
            compare_file = os.path.join(self.gff_outfolder,
                                        "_".join([tss, "TSS.gff"]))
            if detect:
                stat_ta_tss(os.path.join(self.tmps["ta"], ta), compare_file,
                            stat_out, self.tmps["ta_tss"],
                            self.tmps["tss_ta"], args_tss.fuzzy)
                self.helper.sort_gff(self.tmps["tss_ta"], compare_file)
                self.helper.sort_gff(self.tmps["ta_tss"],
                                     os.path.join(args_tss.ta_files, ta))
                os.remove(self.tmps["tss_ta"])
                os.remove(self.tmps["ta_tss"])
                detect = False

    def _stat_tss(self, tsss, feature):
        print("Running statistaics.....")
        for tss in tsss:
            compare_file = os.path.join(self.gff_outfolder,
                                        "_".join([tss, feature]) + ".gff")
            stat_tsspredator(
                compare_file, feature,
                os.path.join(self.stat_outfolder, tss, "_".join([
                    "stat", feature, "class", tss]) + ".csv"),
                os.path.join(self.stat_outfolder, tss, "_".join([
                    "stat", feature, "libs", tss]) + ".csv"))
            self.helper.move_all_content(os.getcwd(), os.path.join(
                self.stat_outfolder, tss), ["_class", ".png"])
            if os.path.exists(os.path.join(
                    self.stat_outfolder, "TSSstatistics.tsv")):
                shutil.move(
                    os.path.join(
                        self.stat_outfolder, "TSSstatistics.tsv"),
                    os.path.join(
                        self.stat_outfolder, tss, "TSSstatistics.tsv"))
            plot_venn(compare_file, feature)
            self.helper.move_all_content(os.getcwd(), os.path.join(
                self.stat_outfolder, tss), ["_venn", ".png"])

    def _set_gen_config(self, args_tss, input_folder):
        prefixs = []
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:70,代码来源:tsspredator.py

示例9: TranscriptAssembly

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import sort_gff [as 别名]
class TranscriptAssembly(object):

    def __init__(self, args_tran):
        self.multiparser = Multiparser()
        self.helper = Helper()
        self.converter = Converter()
        self.gff_outfolder = os.path.join(args_tran.out_folder, "gffs")
        self.tran_path = os.path.join(self.gff_outfolder, "tmp")
        self.stat_path = os.path.join(args_tran.out_folder, "statistics")
        self.tmps = {"gff": "tmp.gff", "merge": "tmp_merge",
                     "tran": os.path.join(args_tran.out_folder, "tmp_tran"),
                     "tss_ta": os.path.join(self.gff_outfolder, "tmp_tss_ta"),
                     "ta_tss": os.path.join(self.gff_outfolder, "tmp_ta_tss"),
                     "ta_gff": os.path.join(self.gff_outfolder, "tmp_ta_gff"),
                     "gff_ta": os.path.join(self.gff_outfolder, "tmp_gff_ta"),
                     "uni": os.path.join(self.gff_outfolder, "tmp_uni"),
                     "overlap": os.path.join(
                         self.gff_outfolder, "tmp_overlap")}
        self.frag = "transcript_assembly_fragment.gff"
        self.tex = "transcript_assembly_tex_notex.gff"
        self.endfix_tran = "transcript.gff"

    def _compute_transcript(self, wig_f, wig_r, wig_folder, wig_type, strain,
                            libs, args_tran):
        print("Computing transcript assembly for {0}...".format(strain))
        out = os.path.join(args_tran.out_folder, "_".join([strain, wig_type]))
        assembly(wig_f, wig_r, wig_folder, libs, out, wig_type, args_tran)

    def _compute(self, wig_type, wigs, libs, args_tran):
        strains = []
        wig_folder = os.path.join(wigs, "tmp")
        for wig in os.listdir(wig_folder):
            if wig.endswith("_forward.wig"):
                strains.append(wig.replace("_forward.wig", ""))
        for strain in strains:
            f_file = os.path.join(wig_folder, "_".join(
                [strain, "forward.wig"]))
            r_file = os.path.join(wig_folder, "_".join(
                [strain, "reverse.wig"]))
            self._compute_transcript(f_file, r_file, wigs, wig_type,
                                     strain, libs, args_tran)
        return strains

    def _compare_tss(self, tas, args_tran):
        self.multiparser.parser_gff(args_tran.compare_tss, "TSS")
        self.multiparser.combine_gff(
                self.gff_outfolder,
                os.path.join(args_tran.compare_tss, "tmp"),
                "transcript", "TSS")
        print("Comaring of Transcript assembly and TSS file...")
        tss_folder = os.path.join(args_tran.compare_tss, "tmp")
        for ta in tas:
            ta_file = os.path.join(self.gff_outfolder,
                                   "_".join([ta, self.endfix_tran]))
            stat_tss_out = os.path.join(
                    self.stat_path, "".join([
                        "stat_compare_Transcriptome_assembly_TSS_",
                        ta, ".csv"]))
            for tss in os.listdir(tss_folder):
                filename = tss.split("_TSS")
                if (filename[0] == ta) and (tss.endswith(".gff")):
                    stat_ta_tss(ta_file, os.path.join(tss_folder, tss),
                                stat_tss_out, self.tmps["ta_tss"],
                                self.tmps["tss_ta"], args_tran.fuzzy)
                    os.remove(ta_file)
                    os.remove(os.path.join(tss_folder, tss))
                    self.helper.sort_gff(self.tmps["ta_tss"], ta_file)
                    self.helper.sort_gff(
                            self.tmps["tss_ta"], os.path.join(
                                args_tran.compare_tss, tss))
                    os.remove(self.tmps["tss_ta"])
                    os.remove(self.tmps["ta_tss"])

    def _compare_cds(self, tas, args_tran):
        self.multiparser.parser_gff(args_tran.compare_cds, None)
        self.multiparser.combine_gff(
            self.gff_outfolder, os.path.join(args_tran.compare_cds, "tmp"),
            "transcript", None)
        print("Comaring of Transcript assembly and gene...")
        cds_folder = os.path.join(args_tran.compare_cds, "tmp")
        for ta in tas:
            ta_file = os.path.join(self.gff_outfolder,
                                   "_".join([ta, self.endfix_tran]))
            stat_gff_out = os.path.join(self.stat_path, "".join([
                "stat_compare_Transcriptome_assembly_gene_", ta, ".csv"]))
            for gff in os.listdir(cds_folder):
                if (gff[:-4] == ta) and (gff.endswith(".gff")):
                    cds_file = os.path.join(cds_folder, gff)
                    stat_ta_gff(ta_file, cds_file, stat_gff_out,
                                self.tmps["ta_gff"], self.tmps["gff_ta"],
                                args_tran.c_feature)
                    os.remove(ta_file)
                    os.remove(os.path.join(args_tran.compare_cds, gff))
                    self.helper.sort_gff(self.tmps["ta_gff"], ta_file)
                    self.helper.sort_gff(self.tmps["gff_ta"], os.path.join(
                        args_tran.compare_cds, gff))
                    os.remove(self.tmps["ta_gff"])
                    os.remove(self.tmps["gff_ta"])

    def _compare_tss_cds(self, tas, args_tran):
#.........这里部分代码省略.........
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:103,代码来源:transcript.py


注:本文中的annogesiclib.helper.Helper.sort_gff方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。