当前位置: 首页>>代码示例>>Python>>正文


Python Helper.check_uni_attributes方法代码示例

本文整理汇总了Python中annogesiclib.helper.Helper.check_uni_attributes方法的典型用法代码示例。如果您正苦于以下问题:Python Helper.check_uni_attributes方法的具体用法?Python Helper.check_uni_attributes怎么用?Python Helper.check_uni_attributes使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在annogesiclib.helper.Helper的用法示例。


在下文中一共展示了Helper.check_uni_attributes方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: GoTermFinding

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_uni_attributes [as 别名]

#.........这里部分代码省略.........
                             "Protein_id", "Go_term"]) + "\n")
        for row in csv.reader(fh, delimiter='\t'):
            if row[0] != "Genome":
                out.write("\t".join(row) + "\n")
        out.close()
        fh.close()
        shutil.move(out_all + "_tmp", out_all)

    def _merge_files(self, gffs, out_path, out_folder, log):
        '''merge the files according to the input genome folder'''
        folders = []
        log.write("Merging the output files based on the input genome "
                  "information.\n")
        for folder in os.listdir(gffs):
            if folder.endswith("gff_folder"):
                folder_prefix = folder.replace(".gff_folder", "")
                folder_path = os.path.join(out_folder, folder_prefix)
                self.helper.check_make_folder(folder_path)
                folders.append(folder_path)
                filenames = []
                for gff in os.listdir(os.path.join(gffs, folder)):
                    if gff.endswith(".gff"):
                        filenames.append(gff.replace(".gff", ""))
                out_all = os.path.join(folder_path, self.all_strain)
                if len(filenames) > 1:
                    if self.all_strain in os.listdir(folder_path):
                        os.remove(out_all)
                    for filename in filenames:
                        csv_file = "_".join([filename, "uniprot.csv"])
                        self.helper.merge_file(os.path.join(out_path,
                                               filename, csv_file), out_all)
                        self._remove_header(out_all)
                        shutil.copy(os.path.join(out_path, filename, csv_file),
                                    folder_path)
                else:
                    shutil.copyfile(os.path.join(out_path, filenames[0],
                                    "_".join([filenames[0], "uniprot.csv"])),
                                    out_all)
        self.helper.remove_all_content(out_path, None, "dir")
        self.helper.remove_all_content(out_path, None, "file")
        for folder in folders:
            folder_prefix = folder.split("/")[-1]
            shutil.move(folder, os.path.join(out_path, folder_prefix))
            for file_ in os.listdir(os.path.join(out_path, folder_prefix)):
                log.write("\t" + os.path.join(out_path, folder_prefix, file_) + 
                          " is generated.\n")

    def _stat(self, out_path, stat_path, go, goslim, out_folder, log):
        log.write("Running gene_ontology.py to Retrieve GOslim terms and "
                  "do statistics.\n")
        log.write("The following files are generated:\n")
        for folder in os.listdir(out_path):
            strain_stat_path = os.path.join(stat_path, folder)
            self.helper.check_make_folder(strain_stat_path)
            fig_path = os.path.join(strain_stat_path, "figs")
            if "fig" not in os.listdir(strain_stat_path):
                os.mkdir(fig_path)
            stat_file = os.path.join(strain_stat_path,
                                    "_".join(["stat", folder + ".csv"]))
            map2goslim(goslim, go,
                       os.path.join(out_path, folder, self.all_strain),
                       stat_file, out_folder)
            log.write("\t" + stat_file + "\n")
            self.helper.move_all_content(out_folder, fig_path,
                                         ["_three_roots.png"])
            self.helper.move_all_content(out_folder, fig_path,
                                         ["_molecular_function.png"])
            self.helper.move_all_content(out_folder, fig_path,
                                         ["_cellular_component.png"])
            self.helper.move_all_content(out_folder, fig_path,
                                         ["_biological_process.png"])
            for file_ in os.listdir(fig_path):
                log.write("\t" + os.path.join(fig_path, file_) + "\n")

    def run_go_term(self, args_go, log):
        for gff in os.listdir(args_go.gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(
                                                 args_go.gffs, gff))
        self.multiparser.parser_gff(args_go.gffs, None)
        if args_go.trans is not None:
            self.multiparser.parser_gff(args_go.trans, "transcript")
        print("Computing all CDSs")
        log.write("Retrieving GO terms for all CDSs.\n")
        self._retrieve_go(args_go.uniprot, self.result_all_path, "all", log)
        self._merge_files(args_go.gffs, self.result_all_path, self.out_all, log)
        self._stat(self.result_all_path, self.stat_all_path, args_go.go,
                   args_go.goslim, self.out_all, log)
        if args_go.trans is not None:
            log.write("Retrieving GO terms only for expressed CDSs.\n")
            print("Computing express CDSs")
            self._retrieve_go(args_go.uniprot, self.result_express_path,
                              "express", log)
            self._merge_files(args_go.gffs, self.result_express_path,
                              self.out_express, log)
            self._stat(self.result_express_path, self.stat_express_path,
                       args_go.go, args_go.goslim, self.out_express, log)
        self.helper.remove_tmp_dir(args_go.gffs)
        if args_go.trans is not None:
            self.helper.remove_tmp_dir(args_go.trans)
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:104,代码来源:goterm.py

示例2: sRNATargetPrediction

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_uni_attributes [as 别名]
class sRNATargetPrediction(object):
    '''detection of sRNA-target interaction'''

    def __init__(self, args_tar):
        self.multiparser = Multiparser()
        self.helper = Helper()
        self.fixer = FormatFixer()
        self.gff_parser = Gff3Parser()
        self.target_seq_path = os.path.join(args_tar.out_folder, "target_seqs")
        self.srna_seq_path = os.path.join(args_tar.out_folder, "sRNA_seqs")
        self.rnaplex_path = os.path.join(args_tar.out_folder, "RNAplex_results")
        self.rnaup_path = os.path.join(args_tar.out_folder, "RNAup_results")
        self.intarna_path = os.path.join(args_tar.out_folder, "IntaRNA_results")
        self.merge_path = os.path.join(args_tar.out_folder, "merged_results")
        self.srna_path = os.path.join(args_tar.srnas, "tmp")
        self.fasta_path = os.path.join(args_tar.fastas, "tmp")
        self.gff_path = os.path.join(args_tar.gffs, "tmp")
        self.tmps = {"tmp": "tmp_srna_target", "rnaup": "tmp_rnaup",
                     "log": "tmp_log",
                     "all_fa": "tmp*.fa", "all_txt": "tmp*.txt"}

    def _check_gff(self, gffs):
        for gff in os.listdir(gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(gffs, gff))

    def _run_rnaplfold(self, rnaplfold_path, file_type, win_size, span,
                       unstr_region, seq_path, prefix, out_path, log):
        current = os.getcwd()
        os.chdir(out_path)
        command = " ".join([rnaplfold_path,
                            "-W", str(win_size),
                            "-L", str(span),
                            "-u", str(unstr_region),
                            "-O"])
        if file_type == "sRNA":
            log.write("<".join([command, os.path.join(current, seq_path,
                                "_".join([self.tmps["tmp"], prefix,
                                          file_type + ".fa"]))]) + "\n")
            os.system("<".join([command, os.path.join(current, seq_path,
                                "_".join([self.tmps["tmp"], prefix,
                                          file_type + ".fa"]))]))
        else:
            log.write("<".join([command, os.path.join(current, seq_path,
                                "_".join([prefix, file_type + ".fa"]))]) + "\n")
            os.system("<".join([command, os.path.join(current, seq_path,
                                "_".join([prefix, file_type + ".fa"]))]))
        os.chdir(current)

    def _wait_process(self, processes):
        for p in processes:
            p.wait()
            if p.stdout:
                p.stdout.close()
            if p.stdin:
                p.stdin.close()
            if p.stderr:
                p.stderr.close()
            try:
                p.kill()
            except OSError:
                pass
            time.sleep(5)

    def _sort_srna_fasta(self, fasta, prefix, path):
        out = open(os.path.join(path,
                   "_".join([self.tmps["tmp"], prefix, "sRNA.fa"])), "w")
        srnas = []
        with open(fasta) as f_h:
            for line in f_h:
                line = line.strip()
                if line.startswith(">"):
                    name = line[1:]
                else:
                    srnas.append({"name": name, "seq": line, "len": len(line)})
        srnas = sorted(srnas, key=lambda x: (x["len"]))
        for srna in srnas:
            out.write(">" + srna["name"].split("|")[0] + "\n")
            out.write(srna["seq"] + "\n")
        out.close()

    def _read_fasta(self, fasta_file):
        seq = ""
        with open(fasta_file, "r") as seq_f:
            for line in seq_f:
                line = line.strip()
                if line.startswith(">"):
                    continue
                else:
                    seq = seq + line
        return seq

    def _get_specific_seq(self, srna_file, seq_file, srna_out, querys):
        for query in querys:
            srna_datas = query.split(":")
            srna = {"seq_id": srna_datas[0], "strand": srna_datas[3],
                    "start": int(srna_datas[1]), "end": int(srna_datas[2])}
            gff_f = open(srna_file, "r")
            out = open(srna_out, "a")
            seq = self._read_fasta(seq_file)
#.........这里部分代码省略.........
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:103,代码来源:srna_target.py

示例3: OperonDetection

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_uni_attributes [as 别名]
class OperonDetection(object):
    '''detection of operon'''

    def __init__(self, args_op):
        self.multiparser = Multiparser()
        self.helper = Helper()
        if args_op.tsss is not None:
            self.tss_path = os.path.join(args_op.tsss, "tmp")
        else:
            self.tss_path = None
        self.tran_path = os.path.join(args_op.trans, "tmp")
        self.table_path = os.path.join(args_op.output_folder, "tables")
        if args_op.terms is not None:
            self._check_gff(args_op.terms, "term")
            self.term_path = os.path.join(args_op.terms, "tmp")
        else:
            self.term_path = None

    def _check_gff(self, gffs, type_):
        for gff in os.listdir(gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(gffs, gff))

    def _detect_operon(self, prefixs, args_op, log):
        log.write("Running detect_operon.py to detect operon.\n")
        log.write("The the following files are generated:\n")
        for prefix in prefixs:
            out_gff = os.path.join(args_op.output_folder, "gffs",
                                    "_".join([prefix, "operon.gff"]))
            out_table = os.path.join(self.table_path,
                                     "_".join([prefix, "operon.csv"]))
            print("Detecting operons of {0}".format(prefix))
            if self.tss_path is None:
                tss = False
            else:
                tss = self.helper.get_correct_file(
                        self.tss_path, "_TSS.gff", prefix, None, None)
            tran = self.helper.get_correct_file(
                    self.tran_path, "_transcript.gff", prefix, None, None)
            gff = self.helper.get_correct_file(
                    args_op.gffs, ".gff", prefix, None, None)
            if self.term_path is None:
                term = False
            else:
                term = self.helper.get_correct_file(
                        self.term_path, "_term.gff", prefix, None, None)
            operon(tran, tss, gff, term, args_op.tss_fuzzy,
                   args_op.term_fuzzy, args_op.length, out_table, out_gff)
            log.write("\t" + out_table + "\n")
            log.write("\t" + out_gff + "\n")

    def _check_and_parser_gff(self, args_op):
        self._check_gff(args_op.gffs, "gff")
        self._check_gff(args_op.trans, "tran")
        self.multiparser.parser_gff(args_op.gffs, None)
        self.multiparser.parser_gff(args_op.trans, "transcript")
        self.multiparser.combine_gff(args_op.gffs, self.tran_path,
                                     None, "transcript")
        if args_op.tsss is not None:
            self._check_gff(args_op.tsss, "tss")
            self.multiparser.parser_gff(args_op.tsss, "TSS")
            self.multiparser.combine_gff(args_op.gffs, self.tss_path, None, "TSS")
        if args_op.terms is not None:
            self._check_gff(args_op.terms, "term")
            self.multiparser.parser_gff(args_op.terms, "term")
            self.multiparser.combine_gff(args_op.gffs, self.term_path,
                                         None, "term")

    def _stat(self, table_path, stat_folder, log):
        log.write("Running stat_operon.py to do statistics.\n")
        for table in os.listdir(table_path):
            if table.endswith("_operon.csv"):
                filename = "_".join(["stat", table])
                out_stat = os.path.join(stat_folder, filename)
                stat(os.path.join(table_path, table), out_stat)
                log.write("\t" + out_stat + "\n")


    def run_operon(self, args_op, log):
        self._check_and_parser_gff(args_op)
        prefixs = []
        for gff in os.listdir(args_op.gffs):
            if gff.endswith(".gff"):
                prefixs.append(gff.replace(".gff", ""))
        self._detect_operon(prefixs, args_op, log)
        self._stat(self.table_path, args_op.stat_folder, log)
        self.helper.remove_tmp_dir(args_op.gffs)
        self.helper.remove_tmp_dir(args_op.tsss)
        self.helper.remove_tmp_dir(args_op.trans)
        if args_op.terms is not None:
            self.helper.remove_tmp_dir(args_op.terms)
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:93,代码来源:operon.py

示例4: OperonDetection

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_uni_attributes [as 别名]
class OperonDetection(object):

    def __init__(self, args_op):
        self.multiparser = Multiparser()
        self.helper = Helper()
        self.tss_path = os.path.join(args_op.tsss, "tmp")
        self.tran_path = os.path.join(args_op.trans, "tmp")
        self.utr5_path = os.path.join(args_op.utr5s, "tmp")
        self.utr3_path = os.path.join(args_op.utr3s, "tmp")
        self.table_path = os.path.join(args_op.output_folder, "tables")
        if args_op.terms is not None:
            self._check_gff(args_op.terms, "term")
            self.term_path = os.path.join(args_op.terms, "tmp")
        else:
            self.term_path = None

    def _check_gff(self, gffs, type_):
        for gff in os.listdir(gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(gffs, gff))

    def _detect_operon(self, prefixs, args_op):
        for prefix in prefixs:
            out_table = os.path.join(self.table_path,
                                     "_".join(["operon", prefix + ".csv"]))
            print("Detection operons of {0}".format(prefix))
            tss = self.helper.get_correct_file(
                    self.tss_path, "_TSS.gff", prefix, None, None)
            tran = self.helper.get_correct_file(
                    self.tran_path, "_transcript.gff", prefix, None, None)
            gff = self.helper.get_correct_file(
                    args_op.gffs, ".gff", prefix, None, None)
            if self.term_path is None:
                term = False
            else:
                term = self.helper.get_correct_file(
                        self.term_path, "_term.gff", prefix, None, None)
            operon(tran, tss, gff, term, args_op.tss_fuzzy,
                   args_op.term_fuzzy, args_op.length, out_table)

    def _check_and_parser_gff(self, args_op):
        self._check_gff(args_op.tsss, "tss")
        self._check_gff(args_op.gffs, "gff")
        self._check_gff(args_op.trans, "tran")
        self._check_gff(args_op.utr5s, "utr")
        self._check_gff(args_op.utr3s, "utr")
        self.multiparser.parser_gff(args_op.gffs, None)
        self.multiparser.parser_gff(args_op.tsss, "TSS")
        self.multiparser.combine_gff(args_op.gffs, self.tss_path, None, "TSS")
        self.multiparser.parser_gff(args_op.trans, "transcript")
        self.multiparser.combine_gff(args_op.gffs, self.tran_path,
                                     None, "transcript")
        self.multiparser.parser_gff(args_op.utr5s, "5UTR")
        self.multiparser.combine_gff(args_op.gffs, self.utr5_path,
                                     None, "5UTR")
        self.multiparser.parser_gff(args_op.utr3s, "3UTR")
        self.multiparser.combine_gff(args_op.gffs, self.utr3_path,
                                     None, "3UTR")
        if args_op.terms is not None:
            self._check_gff(args_op.terms, "term")
            self.multiparser.parser_gff(args_op.terms, "term")
            self.multiparser.combine_gff(args_op.gffs, self.term_path,
                                         None, "term")

    def _stat(self, table_path, stat_folder):
        for table in os.listdir(table_path):
            if table.startswith("operon_") and table.endswith(".csv"):
                filename = "_".join(["stat", table])
                out_stat = os.path.join(stat_folder, filename)
                stat(os.path.join(table_path, table), out_stat)

    def _combine_gff(self, prefixs, args_op):
        for prefix in prefixs:
            out_file = os.path.join(args_op.output_folder, "gffs",
                                    "_".join([prefix, "all_features.gff"]))
            print("Combine all features of {0}".format(prefix))
            tss = self.helper.get_correct_file(
                    self.tss_path, "_TSS.gff", prefix, None, None)
            tran = self.helper.get_correct_file(
                    self.tran_path, "_transcript.gff", prefix, None, None)
            gff = self.helper.get_correct_file(
                    args_op.gffs, ".gff", prefix, None, None)
            utr5 = self.helper.get_correct_file(
                    self.utr5_path, "_5UTR.gff", prefix, None, None)
            utr3 = self.helper.get_correct_file(
                    self.utr3_path, "_3UTR.gff", prefix, None, None)
            if self.term_path is None:
                term = None
            else:
                term = self.helper.get_correct_file(
                        self.term_path, "_term.gff", prefix, None, None)
            combine_gff(gff, tran, tss, utr5, utr3, term,
                        args_op.tss_fuzzy, args_op.term_fuzzy, out_file)

    def run_operon(self, args_op):
        self._check_and_parser_gff(args_op)
        prefixs = []
        for gff in os.listdir(args_op.gffs):
            if gff.endswith(".gff"):
                prefixs.append(gff.replace(".gff", ""))
#.........这里部分代码省略.........
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:103,代码来源:operon.py

示例5: MEME

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_uni_attributes [as 别名]

#.........这里部分代码省略.........
            call(command + ["-oc", os.path.join(meme_folder, folder),
                            os.path.join(input_path, fasta)])
        if (args_pro.program.lower() == "glam2") or (
                args_pro.program.lower() == "both"):
            glam_folder = self._gen_and_check_folder(
                            out_path, folder, "GLAM2")
            log.write(" ".join([args_pro.glam2_path,
                  "-O", os.path.join(glam_folder, folder), "-a",
                  str(min_width), "-b", str(max_width), "-r",
                  str(args_pro.num_motif), "-n", str(args_pro.end_run),
                  "n", os.path.join(input_path, fasta)]) + "\n")
            call([args_pro.glam2_path,
                  "-O", os.path.join(glam_folder, folder), "-a",
                  str(min_width), "-b", str(max_width), "-r",
                  str(args_pro.num_motif), "-n", str(args_pro.end_run),
                  "n", os.path.join(input_path, fasta)])

    def _get_fasta_file(self, fasta_path, prefix):
        for fasta in os.listdir(fasta_path):
            if (fasta.endswith(".fa")) and \
               (prefix == fasta.replace(".fa", "")):
                break
            elif (fasta.endswith(".fna")) and \
                 (prefix == fasta.replace(".fna", "")):
                break
            elif (fasta.endswith(".fasta")) and \
                 (prefix == fasta.replace(".fasta", "")):
                break
        return fasta

    def _check_gff(self, gffs):
        for gff in os.listdir(gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(gffs, gff))

    def _move_and_merge_fasta(self, input_path, prefix):
        all_type = os.path.join(self.tmp_folder, self.fastas["all"])
        all_no_orph = os.path.join(self.tmp_folder, self.fastas["all_no_orph"])
        if self.fastas["all"] in os.listdir(self.tmp_folder):
            os.remove(all_type)
        if self.fastas["all_no_orph"] in os.listdir(self.tmp_folder):
            os.remove(all_no_orph)
        shutil.copyfile(self.fastas["pri"], self.fastas["tmp_fa"])
        self.helper.merge_file(self.fastas["sec"], self.fastas["tmp_fa"])
        self.helper.merge_file(self.fastas["inter"], self.fastas["tmp_fa"])
        self.helper.merge_file(self.fastas["anti"], self.fastas["tmp_fa"])
        shutil.copyfile(self.fastas["tmp_fa"], self.fastas["tmp_all"])
        self.helper.merge_file(self.fastas["orph"], self.fastas["tmp_all"])
        del_repeat_fasta(self.fastas["tmp_fa"], all_no_orph)
        del_repeat_fasta(self.fastas["tmp_all"], all_type)
        os.remove(self.fastas["tmp_fa"])
        os.remove(self.fastas["tmp_all"])
        out_prefix = os.path.join(input_path, prefix)
        shutil.move(self.fastas["pri"], "_".join([
            out_prefix, "allgenome_primary.fa"]))
        shutil.move(self.fastas["sec"], "_".join([
            out_prefix, "allgenome_secondary.fa"]))
        shutil.move(self.fastas["inter"], "_".join([
            out_prefix, "allgenome_internal.fa"]))
        shutil.move(self.fastas["anti"], "_".join([
            out_prefix, "allgenome_antisense.fa"]))
        shutil.move(self.fastas["orph"], "_".join([
            out_prefix, "allgenome_orphan.fa"]))
        shutil.move(all_type, "_".join([
            out_prefix, "allgenome_all_types.fa"]))
        shutil.move(all_no_orph, "_".join([
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:70,代码来源:meme.py

示例6: GoTermFinding

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_uni_attributes [as 别名]
class GoTermFinding(object):

    def __init__(self, args_go):
        self.multiparser = Multiparser()
        self.helper = Helper()
        self.out_all = os.path.join(args_go.out_folder, "all_CDS")
        self.out_express = os.path.join(args_go.out_folder, "expressed_CDS")
        self.result_all_path = os.path.join(self.out_all, "Go_term_results")
        self.result_express_path = os.path.join(self.out_express,
                                                "Go_term_results")
        self.gff_path = os.path.join(args_go.gffs, "tmp")
        if args_go.trans is not None:
            self.tran_path = os.path.join(args_go.trans, "tmp")
        else:
            self.tran_path = None
        self.stat_all_path = os.path.join(self.out_all, "statistics")
        self.stat_express_path = os.path.join(self.out_express,
                                              "statistics")
        self.all_strain = "all_strains_uniprot.csv"

    def _retrieve_go(self, uniprot, out_path, type_):
        prefixs = []
        for gff in os.listdir(self.gff_path):
            prefix = gff.replace(".gff", "")
            prefixs.append(prefix)
            self.helper.check_make_folder(os.path.join(out_path, prefix))
            out_file = os.path.join(out_path, prefix,
                                    "_".join([prefix, "uniprot.csv"]))
            print("extracting Go terms of {0} from UniProt...".format(prefix))
            if self.tran_path is not None:
                tran_file = os.path.join(self.tran_path,
                                         "_".join([prefix, "transcript.gff"]))
            else:
                tran_file = None
            retrieve_uniprot(uniprot, os.path.join(self.gff_path, gff),
                             out_file, tran_file, type_)

    def _merge_files(self, gffs, out_path, out_folder):
        folders = []
        for folder in os.listdir(gffs):
            if folder.endswith("gff_folder"):
                folder_prefix = folder.replace(".gff_folder", "")
                folder_path = os.path.join(out_folder, folder_prefix)
                self.helper.check_make_folder(folder_path)
                folders.append(folder_path)
                filenames = []
                for gff in os.listdir(os.path.join(gffs, folder)):
                    if gff.endswith(".gff"):
                        filenames.append(gff.replace(".gff", ""))
                out_all = os.path.join(folder_path, self.all_strain)
                if len(filenames) > 1:
                    if self.all_strain in os.listdir(folder_path):
                        os.remove(out_all)
                    for filename in filenames:
                        csv_file = "_".join([filename, "uniprot.csv"])
                        self.helper.merge_file(os.path.join(out_path,
                                               filename, csv_file), out_all)
                        shutil.copy(os.path.join(out_path, filename, csv_file),
                                    folder_path)
                else:
                    shutil.copyfile(os.path.join(out_path, filenames[0],
                                    "_".join([filenames[0], "uniprot.csv"])),
                                    out_all)
        self.helper.remove_all_content(out_path, None, "dir")
        self.helper.remove_all_content(out_path, None, "file")
        for folder in folders:
            folder_prefix = folder.split("/")[-1]
            shutil.move(folder, os.path.join(out_path, folder_prefix))

    def _stat(self, out_path, stat_path, go, goslim, out_folder):
        for folder in os.listdir(out_path):
            strain_stat_path = os.path.join(stat_path, folder)
            self.helper.check_make_folder(strain_stat_path)
            fig_path = os.path.join(strain_stat_path, "figs")
            if "fig" not in os.listdir(strain_stat_path):
                os.mkdir(fig_path)
            print("Computing statistics of {0}".format(folder))
            map2goslim(goslim, go,
                       os.path.join(out_path, folder, self.all_strain),
                       os.path.join(strain_stat_path,
                                    "_".join(["stat", folder + ".csv"])),
                       out_folder)
            self.helper.move_all_content(out_folder, fig_path,
                                         ["_three_roots.png"])
            self.helper.move_all_content(out_folder, fig_path,
                                         ["_molecular_function.png"])
            self.helper.move_all_content(out_folder, fig_path,
                                         ["_cellular_component.png"])
            self.helper.move_all_content(out_folder, fig_path,
                                         ["_biological_process.png"])

    def run_go_term(self, args_go):
        for gff in os.listdir(args_go.gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(
                                                 args_go.gffs, gff))
        self.multiparser.parser_gff(args_go.gffs, None)
        if args_go.trans is not None:
            self.multiparser.parser_gff(args_go.trans, "transcript")
        print("Computing all CDS...")
#.........这里部分代码省略.........
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:103,代码来源:goterm.py

示例7: sRNATargetPrediction

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_uni_attributes [as 别名]
class sRNATargetPrediction(object):

    def __init__(self, args_tar):
        self.multiparser = Multiparser()
        self.helper = Helper()
        self.fixer = FormatFixer()
        self.gff_parser = Gff3Parser()
        self.target_seq_path = os.path.join(args_tar.out_folder, "target_seqs")
        self.srna_seq_path = os.path.join(args_tar.out_folder, "sRNA_seqs")
        self.rnaplex_path = os.path.join(args_tar.out_folder, "RNAplex")
        self.rnaup_path = os.path.join(args_tar.out_folder, "RNAup")
        self.merge_path = os.path.join(args_tar.out_folder, "merge")
        self.srna_path = os.path.join(args_tar.srnas, "tmp")
        self.fasta_path = os.path.join(args_tar.fastas, "tmp")
        self.gff_path = os.path.join(args_tar.gffs, "tmp")
        self.tmps = {"tmp": "tmp", "rnaup": "tmp_rnaup", "log": "tmp_log",
                     "all_fa": "tmp*.fa", "all_txt": "tmp*.txt"}

    def _check_gff(self, gffs):
        for gff in os.listdir(gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(gffs, gff))

    def _run_rnaplfold(self, vienna_path, file_type, win_size, span,
                       unstr_region, seq_path, prefix, out_path):
        current = os.getcwd()
        os.chdir(out_path)
        command = " ".join([os.path.join(vienna_path, "RNAplfold"),
                            "-W", str(win_size),
                            "-L", str(span),
                            "-u", str(unstr_region),
                            "-O"])
        if file_type == "sRNA":
            os.system("<".join([command, os.path.join(current, seq_path,
                                "_".join([self.tmps["tmp"], prefix,
                                          file_type + ".fa"]))]))
        else:
            os.system("<".join([command, os.path.join(current, seq_path,
                                "_".join([prefix, file_type + ".fa"]))]))
        os.chdir(current)

    def _wait_process(self, processes):
        for p in processes:
            p.wait()
            if p.stdout:
                p.stdout.close()
            if p.stdin:
                p.stdin.close()
            if p.stderr:
                p.stderr.close()
            try:
                p.kill()
            except OSError:
                pass
            time.sleep(5)

    def _sort_srna_fasta(self, fasta, prefix, path):
        out = open(os.path.join(path,
                   "_".join([self.tmps["tmp"], prefix, "sRNA.fa"])), "w")
        srnas = []
        with open(fasta) as f_h:
            for line in f_h:
                line = line.strip()
                if line.startswith(">"):
                    name = line[1:]
                else:
                    srnas.append({"name": name, "seq": line, "len": len(line)})
        srnas = sorted(srnas, key=lambda x: (x["len"]))
        for srna in srnas:
            out.write(">" + srna["name"].split("|")[0] + "\n")
            out.write(srna["seq"] + "\n")
        out.close()

    def _read_fasta(self, fasta_file):
        seq = ""
        with open(fasta_file, "r") as seq_f:
            for line in seq_f:
                line = line.strip()
                if line.startswith(">"):
                    continue
                else:
                    seq = seq + line
        return seq

    def _get_specific_seq(self, srna_file, seq_file, srna_out, querys):
        for query in querys:
            srna_datas = query.split(":")
            srna = {"seq_id": srna_datas[0], "strand": srna_datas[1],
                    "start": int(srna_datas[2]), "end": int(srna_datas[3])}
            gff_f = open(srna_file, "r")
            out = open(srna_out, "a")
            seq = self._read_fasta(seq_file)
            num = 0
            for entry in self.gff_parser.entries(gff_f):
                if (entry.seq_id == srna["seq_id"]) and (
                        entry.strand == srna["strand"]) and (
                        entry.start == srna["start"]) and (
                        entry.end == srna["end"]):
                    if "ID" in entry.attributes.keys():
                        id_ = entry.attributes["ID"]
#.........这里部分代码省略.........
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:103,代码来源:srna_target.py

示例8: Ribos

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_uni_attributes [as 别名]

#.........这里部分代码省略.........
                log.write("Running ribo_gff.py to do statistics and generate "
                          "gff files for {0}.\n".format(prefix))
                log.write("The following files are generated:\n")
                out_gff = os.path.join(gff_outfolder, "_".join([
                   prefix, feature + ".gff"]))
                stat_and_covert2gff(os.path.join(
                    table_folder, "_".join([prefix, suffixs["csv"]])),
                    feature_id, out_gff,
                    args_ribo.fuzzy, out_stat, feature)
                log.write("\t" + out_gff + "\n")
                log.write("\t" + out_stat + "\n")
                fh.close()

    def _remove_tmp(self, args_ribo):
        self.helper.remove_tmp_dir(args_ribo.gffs)
        self.helper.remove_tmp_dir(args_ribo.fastas)
        self.helper.remove_tmp_dir(args_ribo.trans)
        self.helper.remove_tmp_dir(args_ribo.tsss)

    def _remove_overlap(self, gff_path, tmp_files, suffixs, type_, fuzzy, log):
        log.write("Running rbs_overlap.py to remove the overlapping "
                  "riboswitches/RNA thermometers.\n")
        for gff in os.listdir(gff_path):
            if gff.endswith(".gff"):
                tmp_table = os.path.join(os.path.join(
                        tmp_files["table"], "_".join([
                        gff.replace(".gff", ""), suffixs["csv"]])))
                rbs_overlap(tmp_table,
                    os.path.join(gff_path, gff), type_, fuzzy)
                log.write("\t" + tmp_table + " is updated.\n")

    def _core_prediction(self, args_ribo, feature_id, rfam, tmp_files,
                         table_folder, feature, scan_folder, suffixs,
                         stat_folder, gff_outfolder, out_folder, type_, log):
        '''main part of detection'''
        log.write("Running get_Rfam_ribo.py to get the information of "
                  "riboswitches/RNA thermometers from Rfam.\n")
        rbs_from_rfam(feature_id, args_ribo.rfam, rfam)
        log.write("Using Infernal to compress the Rfam data of "
                  "riboswitches/RNA thermometers.\n")
        log.write("Please make sure the version of Infernal is at least 1.1.1.\n")
        print("Compressing Rfam of " + feature)
        log.write(" ".join([args_ribo.cmpress_path, "-F", rfam]) + "\n")
        call([args_ribo.cmpress_path, "-F", rfam])
        log.write("Done!\n")
        prefixs = []
        self.helper.check_make_folder(tmp_files["fasta"])
        self.helper.check_make_folder(tmp_files["scan"])
        self.helper.check_make_folder(tmp_files["table"])
        prefixs = self._scan_extract_rfam(
                prefixs, args_ribo, tmp_files, suffixs, feature, rfam, log)
        self._remove_overlap(self.gff_path, tmp_files, suffixs, type_,
                             args_ribo.fuzzy, log)
        self._merge_results(args_ribo, scan_folder, suffixs, tmp_files,
                            table_folder, stat_folder, feature_id,
                            gff_outfolder, feature, log)
        log.write("Running map_ribos.py to extract all the details from Rfam.\n")
        mapping_ribos(table_folder, feature_id, feature)
        log.write("The following files are updated:\n")
        for file_ in os.listdir(table_folder):
            log.write("\t" + os.path.join(table_folder, file_) + "\n")
        self.helper.remove_all_content(out_folder, "tmp", "dir")

    def run_ribos(self, args_ribo, log_t, log_r):
        if args_ribo.fuzzy_rbs > 6:
            if log_t is not None:
                log_t.write("--fuzzy_rbs should be equal or less than 6!\n")
            if log_r is not None:
                log_r.write("--fuzzy_rbs should be equal or less than 6!\n")
            print("Error: --fuzzy_rbs should be equal or less than 6!")
            sys.exit()
        self.multiparser.parser_gff(args_ribo.gffs, None)
        self.multiparser.parser_fasta(args_ribo.fastas)
        self.multiparser.parser_gff(args_ribo.trans, "transcript")
        if args_ribo.tsss is not None:
            self.multiparser.parser_gff(args_ribo.tsss, "TSS")
        for gff in os.listdir(args_ribo.gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(
                                                 args_ribo.gffs, gff))
        if (args_ribo.program.lower() == "both") or (
                args_ribo.program.lower() == "riboswitch"):
            print("Detecting riboswtiches now")
            self._core_prediction(
                    args_ribo, args_ribo.ribos_id, self.ribos_rfam,
                    self.ribos_tmp_files, self.ribos_table_folder,
                    "riboswitch", self.ribos_scan_folder, self.ribos_suffixs,
                    self.ribos_stat_folder, self.ribos_gff_outfolder,
                    args_ribo.ribos_out_folder, "riboswitch", log_r)
        if (args_ribo.program.lower() == "both") or (
                args_ribo.program.lower() == "thermometer"):
            print("Detecting RNA thermometers now")
            self._core_prediction(
                    args_ribo, args_ribo.thermo_id, self.thermo_rfam,
                    self.thermo_tmp_files, self.thermo_table_folder,
                    "RNA_thermometer", self.thermo_scan_folder,
                    self.thermo_suffixs, self.thermo_stat_folder,
                    self.thermo_gff_outfolder, args_ribo.thermo_out_folder,
                    "thermometer", log_t)
        self._remove_tmp(args_ribo)
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:104,代码来源:ribos.py

示例9: CircRNADetection

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_uni_attributes [as 别名]

#.........这里部分代码省略.........
        for prefix in tmp_prefixs:
            self.helper.check_make_folder(os.path.join(self.gff_folder,
                                                       prefix))
            shutil.copytree(prefix, os.path.join(self.splice_path, prefix))
            self.helper.check_make_folder(os.path.join(
                                          self.candidate_path, prefix))
            print("comparing with annotation of {0}".format(prefix))
            if self.splices["all_file"] in os.listdir(os.path.join(
                                           self.splice_path, prefix)):
                detect_circrna(os.path.join(self.splice_path, prefix,
                               self.splices["all_file"]), os.path.join(
                               self.gff_path, prefix + ".gff"),
                               os.path.join(self.candidate_path, prefix,
                               "_".join(["circRNA", prefix + "_all.csv"])),
                               args_circ, os.path.join(args_circ.stat_folder,
                               "_".join(["stat_circRNA", prefix + ".csv"])))
                self.converter.convert_circ2gff(
                     os.path.join(self.candidate_path, prefix,
                                  "_".join(["circRNA",
                                            prefix + "_all.csv"])),
                     args_circ, os.path.join(
                                self.gff_folder, prefix,
                                "_".join([prefix, "circRNA_all.gff"])),
                     os.path.join(self.gff_folder, prefix,
                                  "_".join([prefix, "circRNA_best.gff"])))

    def _assign_merge_bam(self, args_circ):
        remove_frags = []
        bam_files = []
        if (args_circ.normal_bams is not None) and (
                args_circ.frag_bams is not None):
            for frag in os.listdir(args_circ.frag_bams):
                if frag.endswith(".bam"):
                    shutil.copyfile(os.path.join(args_circ.frag_bams, frag),
                                    os.path.join(args_circ.normal_bams, frag))
                    remove_frags.append(frag)
            merge_folder = args_circ.normal_bams
        elif (args_circ.normal_bams is not None):
            merge_folder = args_circ.normal_bams
        elif (args_circ.frag_bams is not None):
            merge_folder = args_circ.frag_bams
        else:
            print("Error: please assign bam folder or do alignment!!")
            sys.exit()
        for bam in os.listdir(merge_folder):
            if bam.endswith(".bam"):
                bam_files.append(os.path.join(merge_folder, bam))
        return merge_folder, remove_frags, bam_files

    def run_circrna(self, args_circ):
        for gff in os.listdir(args_circ.gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(
                                                 args_circ.gffs, gff))
        if args_circ.segemehl_path is None:
            print("Error: please assign segemehl folder!!")
            sys.exit()
        self.multiparser.parser_gff(args_circ.gffs, None)
        self.multiparser.combine_gff(args_circ.fastas, self.gff_path,
                                     "fasta", None)
        tmp_reads = []
        if args_circ.align:
            self.multiparser.parser_fasta(args_circ.fastas)
            tmp_reads = self._deal_zip_file(args_circ.read_folder)
            align_files, prefixs = self._align(args_circ)
        else:
            self.multiparser.parser_fasta(args_circ.fastas)
            prefixs = []
            for fasta in os.listdir(self.fasta_path):
                fasta_prefix = fasta.replace(".fa", "")
                prefixs.append(fasta_prefix)
            merge_folder, remove_frag, bam_files = self._assign_merge_bam(
                                                   args_circ)
            align_files = None
        for prefix in prefixs:
            if args_circ.align:
                sub_alignment_path = os.path.join(self.alignment_path, prefix)
                bam_files, convert_ones, remove_ones = self._convert_sam2bam(
                    sub_alignment_path, args_circ.samtools_path, align_files)
            else:
                sub_alignment_path = merge_folder
                convert_ones = []
                remove_ones = []
            self._merge_sort_aligment_file(
                bam_files, args_circ.samtools_path, sub_alignment_path,
                convert_ones, tmp_reads, remove_ones)
            self._run_testrealign(prefix, args_circ.segemehl_path,
                                  sub_alignment_path)
        tmp_prefixs = self._merge_bed(args_circ.fastas, self.splice_path)
        self.multiparser.parser_gff(args_circ.gffs, None)
        self.multiparser.combine_gff(args_circ.fastas, self.gff_path,
                                     "fasta", None)
        self._stat_and_gen_gff(tmp_prefixs, args_circ)
        self.helper.remove_tmp(args_circ.fastas)
        self.helper.remove_tmp(args_circ.gffs)
        for tmp_prefix in tmp_prefixs:
            shutil.rmtree(tmp_prefix)
        if (not args_circ.align) and (len(remove_frag) != 0):
            for frag in remove_frag:
                os.remove(os.path.join(merge_folder, frag))
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:104,代码来源:circrna.py

示例10: MEME

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_uni_attributes [as 别名]
class MEME(object):

    def __init__(self, args_pro):
        self.multiparser = Multiparser()
        self.helper = Helper()
        self.tss_path = os.path.join(args_pro.tsss, "tmp")
        if args_pro.gffs is not None:
            self.gff_path = os.path.join(args_pro.gffs, "tmp")
        else:
            self.gff_path = None
        self.out_fasta = os.path.join(args_pro.output_folder, "fasta_class")
        self.tmp_folder = os.path.join(os.getcwd(), "tmp")
        self.fastas = {"pri": os.path.join(self.tmp_folder, "primary.fa"),
                       "sec": os.path.join(self.tmp_folder, "secondary.fa"),
                       "inter": os.path.join(self.tmp_folder, "internal.fa"),
                       "anti": os.path.join(self.tmp_folder, "antisense.fa"),
                       "orph": os.path.join(self.tmp_folder, "orphan.fa"),
                       "all_no_orph": "without_orphan.fa",
                       "all": "all_type.fa",
                       "tmp_fa": os.path.join(self.tmp_folder, "tmp.fa"),
                       "tmp_all": os.path.join(self.tmp_folder, "tmp_all.fa")}
        self.all_fasta = os.path.join(args_pro.fastas, "allfasta.fa")
        self.all_tss = os.path.join(self.tss_path, "allfasta_TSS.gff")

    def _run_normal_motif(self, input_path, out_path, filename,
                          fasta, width, args_pro):
        print(os.path.join(input_path, fasta))
        folder = "_".join(["promoter_motifs", filename,
                           str(width), "nt"])
        if folder not in os.listdir(out_path):
            call([args_pro.meme_path, "-maxsize", "1000000",
                  "-dna", "-nmotifs", str(args_pro.num_motif),
                  "-w", str(width), "-maxiter", "100",
                  "-evt", str(args_pro.e_value),
                  "-oc", os.path.join(out_path, folder),
                  os.path.join(input_path, fasta)])

    def _run_small_motif(self, input_path, out_path, filename,
                         fasta, width, args_pro):
        data = width.split("-")
        min_width = data[0]
        max_width = data[1]
        folder = "_".join(["promoter_motifs", filename,
                           "-".join([str(min_width), str(max_width)]), "nt"])
        if folder not in os.listdir(out_path):
            call([args_pro.meme_path, "-maxsize", "1000000",
                  "-dna", "-nmotifs", str(args_pro.num_motif),
                  "-minsites", "0", "-maxsites", "2",
                  "-minw", str(min_width), "-maxw", str(max_width),
                  "-maxiter", "100",
                  "-evt", str(args_pro.e_value),
                  "-oc", os.path.join(out_path, folder),
                  os.path.join(input_path, fasta)])

    def _get_fasta_file(self, fasta_path, prefix):
        for fasta in os.listdir(fasta_path):
            if (fasta.endswith(".fa")) and \
               (prefix == fasta.replace(".fa", "")):
                break
            elif (fasta.endswith(".fna")) and \
                 (prefix == fasta.replace(".fna", "")):
                break
            elif (fasta.endswith(".fasta")) and \
                 (prefix == fasta.replace(".fasta", "")):
                break
        return fasta

    def _check_gff(self, gffs):
        for gff in os.listdir(gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(gffs, gff))

    def _move_and_merge_fasta(self, input_path, prefix):
        all_type = os.path.join(self.tmp_folder, self.fastas["all"])
        all_no_orph = os.path.join(self.tmp_folder, self.fastas["all_no_orph"])
        if self.fastas["all"] in os.listdir(self.tmp_folder):
            os.remove(all_type)
        if self.fastas["all_no_orph"] in os.listdir(self.tmp_folder):
            os.remove(all_no_orph)
        shutil.copyfile(self.fastas["pri"], self.fastas["tmp_fa"])
        self.helper.merge_file(self.fastas["sec"], self.fastas["tmp_fa"])
        self.helper.merge_file(self.fastas["inter"], self.fastas["tmp_fa"])
        self.helper.merge_file(self.fastas["anti"], self.fastas["tmp_fa"])
        shutil.copyfile(self.fastas["tmp_fa"], self.fastas["tmp_all"])
        self.helper.merge_file(self.fastas["orph"], self.fastas["tmp_all"])
        del_repeat_fasta(self.fastas["tmp_fa"], all_no_orph)
        del_repeat_fasta(self.fastas["tmp_all"], all_type)
        os.remove(self.fastas["tmp_fa"])
        os.remove(self.fastas["tmp_all"])
        out_prefix = os.path.join(input_path, prefix)
        shutil.move(self.fastas["pri"], "_".join([
            out_prefix, "allstrain_primary.fa"]))
        shutil.move(self.fastas["sec"], "_".join([
            out_prefix, "allstrain_secondary.fa"]))
        shutil.move(self.fastas["inter"], "_".join([
            out_prefix, "allstrain_internal.fa"]))
        shutil.move(self.fastas["anti"], "_".join([
            out_prefix, "allstrain_antisense.fa"]))
        shutil.move(self.fastas["orph"], "_".join([
            out_prefix, "allstrain_orphan.fa"]))
#.........这里部分代码省略.........
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:103,代码来源:meme.py

示例11: TSSpredator

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_uni_attributes [as 别名]

#.........这里部分代码省略.........
                    if tss.endswith("_processing.gff"):
                        ref = self.helper.get_correct_file(
                                args_tss.overlap_gffs, "_TSS.gff",
                                tss.replace("_processing.gff", ""), None, None)
                        filter_tss_pro(os.path.join(out_folder, tss),
                                       ref, args_tss.program,
                                       args_tss.cluster)

    def _low_expression(self, args_tss, gff_folder):
        '''deal with the low expressed TSS'''
        prefix = None
        self._merge_wigs(args_tss.wig_folder, "wig", args_tss.libs)
        for gff in os.listdir(gff_folder):
            if (args_tss.program.lower() == "tss") and (
                    gff.endswith("_TSS.gff")):
                prefix = gff.replace("_TSS.gff", "")
            elif (args_tss.program.lower() == "processing") and (
                    gff.endswith("_processing.gff")):
                prefix = gff.replace("_processing.gff", "")
            if prefix:
                out = open(os.path.join(
                    self.stat_outfolder, prefix, "_".join([
                        "stat", prefix, "low_expression_cutoff.csv"])), "w")
                out.write("\t".join(["Genome", "Cutoff_coverage"]) + "\n")
                cutoff = filter_low_expression(
                        os.path.join(gff_folder, gff), args_tss,
                        "tmp/merge_forward.wig", "tmp/merge_reverse.wig",
                        "tmp/without_low_expression.gff")
                out.write("\t".join([prefix, str(cutoff)]) + "\n")
                os.remove(os.path.join(gff_folder, gff))
                shutil.move("tmp/without_low_expression.gff",
                            os.path.join(gff_folder, gff))
                prefix = None
        out.close()

    def run_tsspredator(self, args_tss, log):
        input_folder = os.path.join(args_tss.out_folder, "configs")
        for gff in os.listdir(args_tss.gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(
                                                 args_tss.gffs, gff))
        self.helper.check_make_folder(self.gff_outfolder)
        self.multiparser.parser_fasta(args_tss.fastas)
        self.multiparser.parser_gff(args_tss.gffs, None)
        self.multiparser.parser_wig(args_tss.wig_folder)
        prefixs = self._set_gen_config(args_tss, input_folder, log)
        for prefix in prefixs:
            out_path = os.path.join(
                    self.master, "_".join(["MasterTable", prefix]))
            config_file = os.path.join(
                    input_folder, "_".join(["config", prefix]) + ".ini")
            self._start_to_run(args_tss.tsspredator_path, config_file,
                               out_path, prefix, log)
            if os.path.exists(os.path.join(out_path, "TSSstatistics.tsv")):
                shutil.move(os.path.join(out_path, "TSSstatistics.tsv"),
                            os.path.join(
                                self.stat_outfolder, "TSSstatistics.tsv"))
        if args_tss.program.lower() == "ps":
            args_tss.program = "processing"
        self._convert_gff(prefixs, args_tss, log)
        if args_tss.check_orphan:
            print("checking the orphan TSSs")
            log.write("Running check_orphan.py to re-check orphan TSSs.\n")
            self._check_orphan(prefixs,
                               os.path.join(args_tss.wig_folder, "tmp"),
                               args_tss)
        self.multiparser.combine_gff(args_tss.gffs, self.gff_outfolder,
                                     None, args_tss.program)
        datas = []
        for gff in os.listdir(self.gff_outfolder):
            if gff.endswith(".gff"):
                gff_folder = gff.replace("".join(["_", args_tss.program,
                                                  ".gff"]), "")
                self.helper.check_make_folder(
                     os.path.join(self.stat_outfolder, gff_folder))
                datas.append(gff_folder)
        if args_tss.remove_low_expression is not None:
            log.write("Running filter_low_expression.py to filter out "
                      "low expressed TSS/PS.\n")
            self._low_expression(args_tss, self.gff_outfolder)
        if args_tss.manual is not None:
            self.multiparser.parser_gff(args_tss.manual, None)
            self.multiparser.combine_gff(args_tss.gffs, self.manual_path,
                                         None, None)
            self.multiparser.combine_fasta(args_tss.gffs, self.fasta_path,
                                         None)
            self.multiparser.combine_wig(args_tss.gffs, self.wig_path,
                                         None, args_tss.libs)
            log.write("Running merge_manual.py to merge the manual TSSs.\n")
            self._merge_manual(datas, args_tss)
        log.write("Running filter_TSS_pro.py to deal with the overlap "
                  "position between TSS and PS.\n")
        self._deal_with_overlap(self.gff_outfolder, args_tss)
        log.write("Running stat_TSSpredator.py to do statistics.\n")
        self._stat_tss(datas, args_tss.program, log)
        if args_tss.validate:
            self._validate(datas, args_tss, log)
        if args_tss.ta_files is not None:
            self._compare_ta(datas, args_tss, log)
        self._remove_files(args_tss)
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:104,代码来源:tsspredator.py

示例12: UTRDetection

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_uni_attributes [as 别名]
class UTRDetection(object):

    def __init__(self, args_utr):
        self.helper = Helper()
        self.multiparser = Multiparser()
        self.tss_path = os.path.join(args_utr.tsss, "tmp")
        self.tran_path = os.path.join(args_utr.trans, "tmp")
        self.utr5_path = os.path.join(args_utr.out_folder, "5UTR")
        self.utr3_path = os.path.join(args_utr.out_folder, "3UTR")
        self.utr5_stat_path = os.path.join(self.utr5_path, "statistics")
        self.utr3_stat_path = os.path.join(self.utr3_path, "statistics")

    def _check_folder(self, folder):
        if folder is None:
            print("Error: lack required files!!!")
            sys.exit()

    def _check_gff(self, folder):
        for gff in os.listdir(folder):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(folder, gff))

    def _compute_utr(self, args_utr):
        for gff in os.listdir(args_utr.gffs):
            if gff.endswith(".gff"):
                prefix = gff[:-4]
                tss = self.helper.get_correct_file(
                        self.tss_path, "_TSS.gff", prefix, None, None)
                tran = self.helper.get_correct_file(
                        self.tran_path, "_transcript.gff", prefix, None, None)
                if args_utr.terms:
                    term = self.helper.get_correct_file(
                                os.path.join(args_utr.terms, "tmp"),
                                "_term.gff", prefix, None, None)
                else:
                    term = None
                print("computing 5'UTR of {0} .....".format(prefix))
                detect_5utr(tss, os.path.join(args_utr.gffs, gff),
                            tran, os.path.join(self.utr5_path, "gffs",
                            "_".join([prefix, "5UTR.gff"])), args_utr)
                print("computing 3'UTR of {0} .....".format(prefix))
                detect_3utr(tran, os.path.join(args_utr.gffs, gff),
                            term, os.path.join(self.utr3_path, "gffs",
                            "_".join([prefix, "3UTR.gff"])), args_utr)
                self.helper.move_all_content(
                    os.getcwd(), self.utr5_stat_path, ["_5utr_length.png"])
                self.helper.move_all_content(
                    os.getcwd(), self.utr3_stat_path, ["_3utr_length.png"])

    def run_utr_detection(self, args_utr):
        self._check_folder(args_utr.tsss)
        self._check_folder(args_utr.gffs)
        self._check_folder(args_utr.trans)
        self._check_gff(args_utr.tsss)
        self._check_gff(args_utr.gffs)
        self._check_gff(args_utr.trans)
        self._check_gff(args_utr.terms)
        self.multiparser.parser_gff(args_utr.gffs, None)
        self.multiparser.parser_gff(args_utr.tsss, "TSS")
        self.multiparser.combine_gff(args_utr.gffs, self.tss_path, None, "TSS")
        self.multiparser.parser_gff(args_utr.trans, "transcript")
        self.multiparser.combine_gff(args_utr.gffs, self.tran_path,
                                     None, "transcript")
        if args_utr.terms:
            self.multiparser.parser_gff(args_utr.terms, "term")
            self.multiparser.combine_gff(args_utr.gffs,
                                         os.path.join(args_utr.terms, "tmp"),
                                         None, "term")
        self._compute_utr(args_utr)
        self.helper.remove_tmp(args_utr.gffs)
        self.helper.remove_tmp(args_utr.tsss)
        self.helper.remove_tmp(args_utr.trans)
        self.helper.remove_tmp(args_utr.terms)
        self.helper.remove_tmp(self.utr5_path)
        self.helper.remove_tmp(self.utr3_path)
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:77,代码来源:utr.py

示例13: TSSpredator

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_uni_attributes [as 别名]

#.........这里部分代码省略.........
            print("Comparing TSS and Processing site...")
            if args_tss.program.lower() == "tss":
                for tss in os.listdir(out_folder):
                    if tss.endswith("_TSS.gff"):
                        ref = self.helper.get_correct_file(
                                args_tss.references, "_processing.gff",
                                tss.replace("_TSS.gff", ""), None, None)
                        filter_tss_pro(os.path.join(out_folder, tss),
                                       ref, args_tss.overlap_feature,
                                       args_tss.cluster)
            elif args_tss.program.lower() == "processing_site":
                for tss in os.listdir(out_folder):
                    if tss.endswith("_processing.gff"):
                        ref = self.helper.get_correct_file(
                                args_tss.references, "_TSS.gff",
                                tss.replace("_processing.gff", ""), None, None)
                        filter_tss_pro(os.path.join(out_folder, tss),
                                       ref, args_tss.overlap_feature,
                                       args_tss.cluster)

    def _low_expression(self, args_tss, gff_folder):
        prefix = None
        self._merge_wigs(args_tss.wig_folder, "wig", args_tss.libs)
        for gff in os.listdir(gff_folder):
            if (args_tss.program.lower() == "tss") and (
                    gff.endswith("_TSS.gff")):
                prefix = gff.replace("_TSS.gff", "")
            elif (args_tss.program.lower() == "processing") and (
                    gff.endswith("_processing.gff")):
                prefix = gff.replace("_processing.gff", "")
            if prefix:
                out = open(os.path.join(
                    self.stat_outfolder, prefix, "_".join([
                        "stat", prefix, "low_expression_cutoff.csv"])), "w")
                out.write("\t".join(["strain", "cutoff_coverage"]) + "\n")
                cutoff = filter_low_expression(
                        os.path.join(gff_folder, gff), args_tss,
                        "tmp/merge_forward.wig", "tmp/merge_reverse.wig",
                        "tmp/without_low_expression.gff")
                out.write("\t".join([prefix, str(cutoff)]) + "\n")
                os.remove(os.path.join(gff_folder, gff))
                shutil.move("tmp/without_low_expression.gff",
                            os.path.join(gff_folder, gff))
                prefix = None
        out.close()

    def run_tsspredator(self, args_tss):
        input_folder = os.path.join(args_tss.out_folder, "configs")
        for gff in os.listdir(args_tss.gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(
                                                 args_tss.gffs, gff))
        self.helper.check_make_folder(self.gff_outfolder)
        self.multiparser.parser_fasta(args_tss.fastas)
        self.multiparser.parser_gff(args_tss.gffs, None)
        self.multiparser.parser_wig(args_tss.wig_folder)
        prefixs = self._set_gen_config(args_tss, input_folder)
        for prefix in prefixs:
            out_path = os.path.join(
                    self.master, "_".join(["MasterTable", prefix]))
            config_file = os.path.join(
                    input_folder, "_".join(["config", prefix]) + ".ini")
            self._start_to_run(args_tss.tsspredator_path, config_file,
                               out_path, prefix)
            if os.path.exists(os.path.join(out_path, "TSSstatistics.tsv")):
                shutil.move(os.path.join(out_path, "TSSstatistics.tsv"),
                            os.path.join(
                                self.stat_outfolder, "TSSstatistics.tsv"))
        if args_tss.program.lower() == "processing_site":
            args_tss.program = "processing"
        self._convert_gff(prefixs, args_tss)
        if args_tss.check_orphan:
            print("checking the orphan TSS...")
            self._check_orphan(prefixs,
                               os.path.join(args_tss.wig_folder, "tmp"),
                               args_tss)
        self.multiparser.combine_gff(args_tss.gffs, self.gff_outfolder,
                                     None, args_tss.program)
        datas = []
        for gff in os.listdir(self.gff_outfolder):
            if gff.endswith(".gff"):
                gff_folder = gff.replace("".join(["_", args_tss.program,
                                                  ".gff"]), "")
                self.helper.check_make_folder(
                     os.path.join(self.stat_outfolder, gff_folder))
                datas.append(gff_folder)
        if args_tss.remove_low_expression is not None:
            self._low_expression(args_tss, self.gff_outfolder)
        if args_tss.manual is not None:
            self.multiparser.combine_wig(args_tss.gffs, self.wig_path,
                                         None, args_tss.libs)
            self._merge_manual(datas, args_tss)
        self._deal_with_overlap(self.gff_outfolder, args_tss)
        if args_tss.stat:
            self._stat_tss(datas, args_tss.program)
        if args_tss.validate:
            self._validate(datas, args_tss)
        if args_tss.ta_files is not None:
            self._compare_ta(datas, args_tss)
        self._remove_files(args_tss)
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:104,代码来源:tsspredator.py

示例14: Ribos

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_uni_attributes [as 别名]

#.........这里部分代码省略.........
                regenerate_seq(first_scan_file, first_seq,
                               first_table, sec_seq)
                print("scanning of {0}".format(prefix))
                sec_scan_file = self._run_infernal(args_ribo, sec_seq,
                                                   "re_txt", prefix)
                sec_table = os.path.join(
                        self.tmp_files["table"],
                        "_".join([prefix, self.suffixs["re_csv"]]))
                reextract_rbs(sec_scan_file, first_table, sec_table)
                shutil.move(sec_table, first_table)
                modify_table(first_table, args_ribo.output_all)
        return prefixs

    def _merge_results(self, args_ribo):
        for gff in os.listdir(args_ribo.gffs):
            if gff.endswith(".gff"):
                prefix = gff.replace(".gff", "")
                print("Merge results of {0}".format(prefix))
                pre_strain = ""
                self.helper.check_make_folder(os.path.join(
                                              self.scan_folder, prefix))
                fh = open(os.path.join(args_ribo.gffs, gff))
                for entry in self.gff_parser.entries(fh):
                    if entry.seq_id != pre_strain:
                        if len(pre_strain) == 0:
                            shutil.copyfile(os.path.join(
                                self.tmp_files["table"],
                                "_".join([entry.seq_id, self.suffixs["csv"]])),
                                os.path.join(
                                    self.table_folder,
                                    "_".join([prefix, self.suffixs["csv"]])))
                        else:
                            self.helper.merge_file(os.path.join(
                                self.tmp_files["table"],
                                "_".join([entry.seq_id, self.suffixs["csv"]])),
                                os.path.join(
                                    self.table_folder,
                                    "_".join([prefix, self.suffixs["csv"]])))
                        shutil.copy(os.path.join(
                            self.tmp_files["scan"],
                            "_".join([entry.seq_id, self.suffixs["txt"]])),
                            os.path.join(self.scan_folder, prefix))
                        shutil.copy(os.path.join(
                            self.tmp_files["scan"],
                            "_".join([entry.seq_id, self.suffixs["re_txt"]])),
                            os.path.join(self.scan_folder, prefix))
                        pre_strain = entry.seq_id
                out_stat = os.path.join(
                        self.stat_folder,
                        "_".join(["stat", prefix, "riboswitch.txt"]))
                print("compute statistics of {0}".format(prefix))
                stat_and_covert2gff(os.path.join(
                    self.table_folder,
                    "_".join([prefix, self.suffixs["csv"]])),
                    args_ribo.ribos_id, os.path.join(
                        self.gff_outfolder,
                        "_".join([prefix, "riboswitch.gff"])),
                    args_ribo.fuzzy, out_stat)
                fh.close()

    def _remove_tmp(self, args_ribo):
        self.helper.remove_tmp(args_ribo.gffs)
        self.helper.remove_tmp(args_ribo.fastas)
        self.helper.remove_all_content(args_ribo.out_folder, "tmp", "dir")

    def _remove_overlap(self, gff_path):
        for gff in os.listdir(gff_path):
            if gff.endswith(".gff"):
                rbs_overlap(
                    os.path.join(os.path.join(
                        self.tmp_files["table"],
                        "_".join([gff.replace(".gff", ""),
                                  self.suffixs["csv"]]))),
                    os.path.join(gff_path, gff))

    def run_ribos(self, args_ribo):
        if args_ribo.fuzzy_rbs > 6:
            print("Error: --fuzzy_rbs should be equal or less than 6!!")
            sys.exit()
        self.multiparser.parser_gff(args_ribo.gffs, None)
        self.multiparser.parser_fasta(args_ribo.fastas)
        self.multiparser.parser_gff(args_ribo.trans, "transcript")
        self.multiparser.parser_gff(args_ribo.tsss, "TSS")
        for gff in os.listdir(args_ribo.gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(
                                                 args_ribo.gffs, gff))
        rbs_from_rfam(args_ribo.ribos_id, args_ribo.rfam, self.ribos_rfam)
        print("compressing Rfam...")
        call([os.path.join(args_ribo.infernal_path, "cmpress"),
              "-F", self.ribos_rfam])
        prefixs = []
        self.helper.check_make_folder(self.tmp_files["fasta"])
        self.helper.check_make_folder(self.tmp_files["scan"])
        self.helper.check_make_folder(self.tmp_files["table"])
        prefixs = self._scan_extract_rfam(prefixs, args_ribo)
        self._remove_overlap(self.gff_path)
        self._merge_results(args_ribo)
        mapping_ribos(self.table_folder, args_ribo.ribos_id)
        self._remove_tmp(args_ribo)
开发者ID:malvikasharan,项目名称:ANNOgesic,代码行数:104,代码来源:ribos.py

示例15: SubLocal

# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_uni_attributes [as 别名]

#.........这里部分代码省略.........
            tmp_psortb_path, "_".join([prefix, self.endfix_raw])),
            os.path.join(tmp_psortb_path, "_".join([
                prefix, self.endfix_table])),
            None, None, args_sub.fuzzy)
        log.write("\t" + os.path.join(tmp_psortb_path, "_".join([
            prefix, self.endfix_table])) + " is tempoaray generated.\n")

    def _remove_header(self, out_all):
        out = open(out_all + "_tmp", "w")
        fh = open(out_all, "r")
        out.write("\t".join(["#Genome", "Protein", "Strand", "Start",
                             "End", "Location", "Score"]) + "\n")
        for row in csv.reader(fh, delimiter='\t'):
            if row[0] != "#Genome":
                out.write("\t".join(row) + "\n")
        out.close()
        fh.close()
        shutil.move(out_all + "_tmp", out_all)

    def _merge_and_stat(self, gffs, tmp_psortb_path, stat_path, psortb_result,
                        log):
        for folder in os.listdir(gffs):
            if folder.endswith(".gff_folder"):
                prefix = folder.replace(".gff_folder", "")
                self.helper.check_make_folder(
                     os.path.join(psortb_result, prefix))
                merge_table = os.path.join(
                        psortb_result, prefix,
                        "_".join([prefix, self.endfix_table]))
                for gff in os.listdir(os.path.join(gffs, folder)):
                    result = self.helper.get_correct_file(
                            tmp_psortb_path, "_" + self.endfix_raw,
                            gff.replace(".gff", ""), None, None)
                    shutil.copy(result, os.path.join(psortb_result, prefix))
                    result = self.helper.get_correct_file(
                            tmp_psortb_path, "_" + self.endfix_table,
                            gff.replace(".gff", ""), None, None)
                    self.helper.merge_file(result, merge_table)
                log.write("\t" + merge_table + "\n")
                self._remove_header(merge_table)
                self.helper.check_make_folder(os.path.join(stat_path, prefix))
                stat_folder = os.path.join(stat_path, prefix)
                stat_file = os.path.join(stat_folder, "_".join([
                                      "stat", prefix, "sublocal.csv"]))
                stat_sublocal(merge_table,
                              os.path.join(stat_folder, prefix),
                              stat_file)
                for file_ in os.listdir(stat_folder):
                    log.write("\t" + os.path.join(stat_folder, file_) + "\n")

    def _remove_tmps(self, args_sub):
        self.helper.remove_tmp_dir(args_sub.fastas)
        self.helper.remove_tmp_dir(args_sub.gffs)
        self.helper.remove_all_content(args_sub.out_folder, "tmp", "dir")
        self.helper.remove_all_content(self.out_all, "tmp", "dir")
        self.helper.remove_all_content(self.out_express, "tmp", "dir")
        os.remove(os.path.join(self.out_all, "tmp_log"))
        if args_sub.trans is not None:
            os.remove(os.path.join(self.out_express, "tmp_log"))
            self.helper.remove_tmp_dir(args_sub.trans)

    def run_sub_local(self, args_sub, log):
        for gff in os.listdir(args_sub.gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(
                                                 args_sub.gffs, gff))
        self.multiparser.parser_gff(args_sub.gffs, None)
        self.multiparser.parser_fasta(args_sub.fastas)
        if args_sub.trans is not None:
            self.multiparser.parser_gff(args_sub.trans, "transcript")
            self.helper.check_make_folder(self.express_tmp_path)
            self.helper.check_make_folder(self.express_tmp_result)
        self.helper.check_make_folder(self.all_tmp_path)
        self.helper.check_make_folder(self.all_tmp_result)
        for gff in os.listdir(self.gff_path):
            if args_sub.trans is not None:
                print("Running expressed genes now")
                prefix = self._get_protein_seq(gff, self.express_tmp_path,
                                               self.tran_path, args_sub, log)
                self._run_psortb(args_sub, prefix, self.out_express,
                                 self.express_tmp_path,
                                 self.express_tmp_result, log)
                self._extract_result(args_sub, self.express_tmp_result, prefix,
                                     os.path.join(self.gff_path, gff), log)
            print("Running all genes now")
            prefix = self._get_protein_seq(gff, self.all_tmp_path, None,
                                           args_sub, log)
            self._run_psortb(args_sub, prefix, self.out_all,
                             self.all_tmp_path, self.all_tmp_result, log)
            self._extract_result(args_sub, self.all_tmp_result, prefix,
                                 os.path.join(self.gff_path, gff), log)
        log.write("Running stat_sublocal.py to do statistics, generate "
                  "merged tables, and plot figures.\n")
        log.write("The following files are generated:\n")
        self._merge_and_stat(args_sub.gffs, self.all_tmp_result,
                             self.all_stat_path, self.all_result, log)
        if args_sub.trans is not None:
            self._merge_and_stat(args_sub.gffs, self.express_tmp_result,
                                 self.express_stat_path, self.express_result, log)
        self._remove_tmps(args_sub)
开发者ID:Sung-Huan,项目名称:ANNOgesic,代码行数:104,代码来源:sublocal.py


注:本文中的annogesiclib.helper.Helper.check_uni_attributes方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。