本文整理汇总了Python中annogesiclib.helper.Helper.remove_tmp方法的典型用法代码示例。如果您正苦于以下问题:Python Helper.remove_tmp方法的具体用法?Python Helper.remove_tmp怎么用?Python Helper.remove_tmp使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类annogesiclib.helper.Helper
的用法示例。
在下文中一共展示了Helper.remove_tmp方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Terminator
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp [as 别名]
#.........这里部分代码省略.........
for prefix in prefixs:
tmp_seq = os.path.join(args_term.out_folder,
"_".join(["inter_seq", prefix]))
tmp_sec = os.path.join(args_term.out_folder,
"_".join(["inter_sec", prefix]))
tran_file = os.path.join(self.tran_path,
"_".join([prefix, "transcript.gff"]))
gff_file = os.path.join(merge_path, prefix + ".gff")
print("Extracting seq of {0}".format(prefix))
intergenic_seq(os.path.join(self.fasta_path, prefix + ".fa"),
tran_file, gff_file, tmp_seq)
self._run_rnafold(args_term.RNAfold_path, tmp_seq, tmp_sec, prefix)
tmp_cand = os.path.join(args_term.out_folder,
"_".join(["term_candidates", prefix]))
poly_t(tmp_seq, tmp_sec, gff_file, tran_file, tmp_cand, args_term)
print("detection of terminator")
detect_coverage(
tmp_cand, os.path.join(merge_path, prefix + ".gff"),
os.path.join(self.tran_path, "_".join([
prefix, "transcript.gff"])),
os.path.join(self.fasta_path, prefix + ".fa"),
os.path.join(wig_path, "_".join([prefix, "forward.wig"])),
os.path.join(wig_path, "_".join([prefix, "reverse.wig"])),
os.path.join(self.tmps["hp_path"], "_".join([
prefix, self.tmps["hp_gff"]])), merge_wigs,
os.path.join(self.outfolder["term"], "_".join([
prefix, self.suffixs["gff"]])),
os.path.join(self.tmps["term_table"], "_".join([
prefix, "term_raw.csv"])), args_term)
self.multiparser.combine_gff(args_term.gffs, self.outfolder["term"],
None, "term")
self._move_file(self.outfolder["term"], self.outfolder["csv"])
def _remove_tmp_file(self, merge_wigs, args_term):
self.helper.remove_tmp(args_term.gffs)
self.helper.remove_tmp(args_term.fastas)
if args_term.srnas is not None:
self.helper.remove_tmp(args_term.srnas)
shutil.rmtree(self.tmps["merge"])
if (args_term.tex_wigs is not None) and (
args_term.frag_wigs is not None):
shutil.rmtree(merge_wigs)
self.helper.remove_tmp(args_term.trans)
self.helper.remove_tmp(args_term.tex_wigs)
self.helper.remove_tmp(args_term.frag_wigs)
self.helper.remove_tmp(self.outfolder["term"])
shutil.rmtree(self.tmps["transterm"])
shutil.rmtree(self.tmps["term_table"])
self.helper.remove_all_content(args_term.out_folder,
"inter_seq_", "file")
self.helper.remove_all_content(args_term.out_folder,
"inter_sec_", "file")
self.helper.remove_all_content(args_term.out_folder,
"term_candidates_", "file")
def _compute_stat(self, args_term):
new_prefixs = []
for gff in os.listdir(self.terms["all"]):
if gff.endswith("_term_all.gff"):
out_tmp = open(self.tmps["gff"], "w")
out_tmp.write("##gff-version 3\n")
new_prefix = gff.replace("_term_all.gff", "")
new_prefixs.append(gff.replace("_term_all.gff", ""))
num = 0
fh = open(os.path.join(self.terms["all"], gff))
for entry in self.gff_parser.entries(fh):
示例2: sRNADetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp [as 别名]
#.........这里部分代码省略.........
"stat_sRNA_class", prefix + ".csv"]))
classify_srna(os.path.join(self.all_best["all_gff"],
"_".join([prefix, "sRNA.gff"])), class_gff,
out_stat, args_srna)
for srna in os.listdir(class_gff):
out_table = os.path.join(
class_table, srna.replace(".gff", ".csv"))
gen_srna_table(
os.path.join(class_gff, srna),
"_".join([self.prefixs["merge_table"], prefix]),
"_".join([self.tmps["nr"], prefix + ".csv"]),
"_".join([self.tmps["srna"], prefix + ".csv"]),
args_srna, out_table)
def _get_best_result(self, prefixs, args_srna):
for prefix in prefixs:
best_gff = os.path.join(self.all_best["best_gff"],
"_".join([prefix, "sRNA.gff"]))
best_table = os.path.join(self.all_best["best_table"],
"_".join([prefix, "sRNA.csv"]))
gen_best_srna(os.path.join(self.all_best["all_gff"],
"_".join([prefix, "sRNA.gff"])),
best_gff, args_srna)
gen_srna_table(os.path.join(self.all_best["best_gff"],
"_".join([prefix, "sRNA.gff"])),
"_".join([self.prefixs["merge_table"], prefix]),
"_".join([self.tmps["nr"], prefix + ".csv"]),
"_".join([self.tmps["srna"], prefix + ".csv"]),
args_srna, best_table)
def _remove_file(self, args_srna):
self.helper.remove_all_content(args_srna.out_folder, "tmp_", "dir")
self.helper.remove_all_content(args_srna.out_folder, "tmp_", "file")
self.helper.remove_tmp(args_srna.fastas)
self.helper.remove_tmp(args_srna.gffs)
if args_srna.frag_wigs is not None:
self.helper.remove_tmp(args_srna.frag_wigs)
if args_srna.tex_wigs is not None:
self.helper.remove_tmp(args_srna.tex_wigs)
if (args_srna.frag_wigs is not None) and (
args_srna.tex_wigs is not None):
shutil.rmtree(args_srna.merge_wigs)
self.helper.remove_tmp(args_srna.trans)
if args_srna.tss_folder is not None:
self.helper.remove_tmp(args_srna.tss_folder)
if args_srna.pro_folder is not None:
self.helper.remove_tmp(args_srna.pro_folder)
if args_srna.sorf_file is not None:
self.helper.remove_tmp(args_srna.sorf_file)
if "tmp_median" in os.listdir(args_srna.out_folder):
os.remove(os.path.join(args_srna.out_folder, "tmp_median"))
if self.term_path is not None:
self.helper.remove_tmp(args_srna.terms)
def _filter_srna(self, args_srna, prefixs):
if "sec_str" in args_srna.import_info:
self._compute_2d_and_energy(args_srna, prefixs)
if "blast_nr" in args_srna.import_info:
self._blast(args_srna.nr_database, args_srna.nr_format, "prot",
args_srna, prefixs, "blastx", "nr", args_srna.e_nr)
if "blast_srna" in args_srna.import_info:
self._blast(args_srna.srna_database, args_srna.srna_format, "nucl",
args_srna, prefixs, "blastn", "sRNA", args_srna.e_srna)
if "sorf" in args_srna.import_info:
for prefix in prefixs:
if ("_".join([prefix, "sORF.gff"]) in
示例3: TestHelper
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp [as 别名]
class TestHelper(unittest.TestCase):
def setUp(self):
self.example = ExampleData()
self.helper = Helper()
self.gff_out = self.example.gff_out
self.rev_seq = self.example.rev_seq.replace("\n", "")
self.test_folder = "test_folder"
if (not os.path.exists(self.test_folder)):
os.mkdir(self.test_folder)
self.gff_file = os.path.join(self.test_folder, "test.gff")
with open(self.gff_file, "w") as rh:
rh.write(self.example.gff_file)
self.seq_file = os.path.join(self.test_folder, "test.fa")
with open(self.seq_file, "w") as rh:
rh.write(self.example.seq)
def tearDown(self):
if os.path.exists(self.test_folder):
shutil.rmtree(self.test_folder)
def test_remove_all_content(self):
tmp1 = os.path.join(self.test_folder, "tmp1.gff")
tmp2 = os.path.join(self.test_folder, "tmp2")
shutil.copyfile(self.gff_file, tmp1)
os.mkdir(tmp2)
self.helper.remove_all_content(self.test_folder, "tmp", "file")
self.assertFalse(os.path.exists(tmp1))
self.assertTrue(os.path.exists(tmp2))
self.helper.remove_all_content(self.test_folder, "tmp", "dir")
self.assertFalse(os.path.exists(tmp2))
self.assertTrue(os.path.exists(self.gff_file))
def test_remove_tmp(self):
tmp1 = os.path.join(self.test_folder, "tmp")
tmp2 = os.path.join(self.test_folder, "test.gff_folder")
os.mkdir(tmp1)
os.mkdir(tmp2)
self.helper.remove_tmp(self.test_folder)
self.assertFalse(os.path.exists(tmp1))
self.assertFalse(os.path.exists(tmp2))
def test_get_correct_file(self):
gff_file = os.path.join(self.test_folder, "test.gff")
wig_f_file = os.path.join(self.test_folder,
"test_forward.wig_STRAIN_aaa.wig")
wig_r_file = os.path.join(self.test_folder,
"test_reverse.wig_STRAIN_aaa.wig")
shutil.copyfile(gff_file, wig_f_file)
shutil.copyfile(gff_file, wig_r_file)
libs = ["test_forward.wig_STRAIN_aaa.wig:frag:1:a:+",
"test_reverse.wig_STRAIN_aaa.wig:frag:1:a:-"]
filename = self.helper.get_correct_file(
self.test_folder, ".gff", "test", None, libs)
self.assertEqual(filename, gff_file)
def test_sorf_gff(self):
out_file = os.path.join(self.test_folder, "test.out")
self.helper.sort_gff(self.gff_file, out_file)
datas = import_data(out_file)
self.assertEqual(set(datas), set(self.gff_out.split("\n")))
def test_extract_gene(self):
seq = self.example.seq.replace("\n", "")
new_seq = self.helper.extract_gene(seq, 1, 70, "+")
self.assertEqual(new_seq,
"CGCAGGTTGAGTTCCTGTTCCCGATAGATCCGATAAACCCGCTTATGATTCCAGAGCTGTCCCTGCACAT")
new_seq = self.helper.extract_gene(seq, 1, 140, "-")
self.assertEqual(new_seq, self.rev_seq)
def test_get_seq(self):
gff_file = os.path.join(self.test_folder, "test.gff")
out_file = os.path.join(self.test_folder, "test.cds")
lines = self.example.gff_out.split("\n")
with open(gff_file, "w") as gh:
gh.write(lines[1])
self.helper.get_seq(self.gff_file, self.seq_file, out_file)
datas = import_data(out_file)
self.assertEqual(set(datas), set([">cds0|aaa|1|10|+", "CGCAGGTTGA"]))
示例4: TranscriptDetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp [as 别名]
#.........这里部分代码省略.........
fill_gap(os.path.join(args_tran.gffs, gff),
os.path.join(self.tran_path,
"_".join([ta, self.endfix_tran])),
"overlap", self.tmps["overlap"], args_tran.modify)
fill_gap(os.path.join(args_tran.gffs, gff),
os.path.join(self.tran_path,
"_".join([ta, self.endfix_tran])),
"uni", self.tmps["uni"], args_tran.modify)
tmp_merge = os.path.join(self.gff_outfolder, self.tmps["merge"])
if self.tmps["merge"] in self.gff_outfolder:
os.remove(tmp_merge)
self.helper.merge_file(self.tmps["overlap"], tmp_merge)
self.helper.merge_file(self.tmps["uni"], tmp_merge)
tmp_out = os.path.join(self.gff_outfolder, "_".join(["tmp", ta]))
self.helper.sort_gff(tmp_merge, tmp_out)
os.remove(self.tmps["overlap"])
os.remove(self.tmps["uni"])
os.remove(tmp_merge)
final_out = os.path.join(self.gff_outfolder,
"_".join(["final", ta]))
longer_ta(tmp_out, args_tran.length, final_out)
shutil.move(final_out,
os.path.join(self.tmps["tran"],
"_".join([ta, self.endfix_tran])))
os.remove(tmp_out)
shutil.rmtree(self.gff_outfolder)
shutil.move(self.tmps["tran"], self.gff_outfolder)
def _remove_file(self, args_tran):
if "tmp_wig" in os.listdir(args_tran.out_folder):
shutil.rmtree(os.path.join(args_tran.out_folder, "tmp_wig"))
if "merge_wigs" in os.listdir(args_tran.out_folder):
shutil.rmtree(os.path.join(args_tran.out_folder, "merge_wigs"))
self.helper.remove_tmp_dir(args_tran.gffs)
self.helper.remove_tmp_dir(args_tran.compare_tss)
self.helper.remove_tmp_dir(args_tran.terms)
self.helper.remove_tmp(os.path.join(args_tran.out_folder, "gffs"))
self.helper.remove_tmp(self.gff_outfolder)
def _compare_term_tran(self, args_tran, log):
'''searching the associated terminator to transcript'''
if args_tran.terms is not None:
print("Comparing between terminators and transcripts")
self.multiparser.parser_gff(args_tran.terms, "term")
if args_tran.gffs is not None:
self.multiparser.combine_gff(
args_tran.gffs,
os.path.join(args_tran.terms, "tmp"), None, "term")
log.write("Running compare_tran_term.py to compare transcripts "
"with terminators.\n")
compare_term_tran(self.gff_outfolder,
os.path.join(args_tran.terms, "tmp"),
args_tran.fuzzy_term, args_tran.fuzzy_term,
args_tran.out_folder, "transcript",
args_tran.terms, self.gff_outfolder)
for file_ in os.listdir(os.path.join(args_tran.out_folder, "statistics")):
if file_.startswith("stat_compare_transcript_terminator_"):
log.write("\t" + file_ + " is generated.\n")
def _re_table(self, args_tran, log):
log.write("Running re_table.py to generate coverage information.\n")
log.write("The following files are updated:\n")
for gff in os.listdir(self.gff_outfolder):
if os.path.isfile(os.path.join(self.gff_outfolder, gff)):
tran_table = os.path.join(args_tran.out_folder, "tables",
gff.replace(".gff", ".csv"))
示例5: SNPCalling
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp [as 别名]
#.........这里部分代码省略.........
elif fasta.endswith(".fasta"):
prefix = fasta[:-6]
detect = True
return (detect, prefix)
def _run_bam(self, samtools_path, sub_command, bam_file):
if sub_command == "merge":
command = (" ".join([samtools_path, sub_command,
self.bams["whole"], bam_file]))
elif sub_command == "sort":
command = (" ".join([samtools_path, sub_command,
"-o", bam_file, self.bams["whole"]]))
os.system(command)
def _merge_bams(self, args_snp):
bams = []
num_normal = 0
num_frag = 0
if (args_snp.frag_bams is None) and (args_snp.normal_bams is None):
print("Error: There is no BAMs folders!!")
sys.exit()
else:
if args_snp.normal_bams is not None:
num_normal = self._import_bam(args_snp.normal_bams, bams)
if args_snp.frag_bams is not None:
num_frag = self._import_bam(args_snp.frag_bams, bams)
num_bam = num_normal + num_frag
if num_bam <= 1:
shutil.copyfile(bams[0], self.bams["whole"])
print("Sort BAM file now ...")
self._run_bam(args_snp.samtools_path, "sort",
self.bams["sort"])
else:
print("Merge BAM files now ...")
self._run_bam(args_snp.samtools_path, "merge", " ".join(bams))
print("Sort BAM file now ...")
self._run_bam(args_snp.samtools_path, "sort",
self.bams["sort"])
return num_bam
def _modify_header(self, fastas):
for fasta in os.listdir(fastas):
if fasta.endswith("fasta") or \
fasta.endswith("fa") or \
fasta.endswith("fna"):
self.seq_editer.modify_header(os.path.join(fastas, fasta))
def _get_header(self, samtools_path):
command = " ".join([samtools_path, "view", "-H", self.bams["sort"]])
os.system(">".join([command, self.header]))
def _get_genome_name(self, samtools_path):
self._get_header(samtools_path)
fh = open(self.header, "r")
seq_names = []
for row in csv.reader(fh, delimiter="\t"):
if row[0] == "@SQ":
seq_names.append(row[1].split(":")[1])
fh.close()
return seq_names
def run_snp_calling(self, args_snp):
self.multiparser.parser_fasta(args_snp.fastas)
self._modify_header(args_snp.fastas)
bam_number = self._merge_bams(args_snp)
seq_names = self._get_genome_name(args_snp.samtools_path)
if ("1" not in args_snp.program) and (
"2" not in args_snp.program) and (
"3" not in args_snp.program):
print("Error:Please assign a correct BAQ type: "
"'1' means 'with_BAQ', '2' means 'with_BAQ' or "
"'3' means 'extend_BAQ'.")
sys.exit()
else:
for fasta in os.listdir(self.fasta_path):
if (fasta.split(".f")[0] in seq_names):
fasta_datas = self._detect_fasta(fasta)
detect = fasta_datas[0]
prefix = fasta_datas[1]
if detect:
detect = False
print("Computing {0} now ...".format(fasta))
self.helper.check_make_folder(
os.path.join(self.outputs["table"], prefix))
self.helper.check_make_folder(
os.path.join(self.outputs["raw"], prefix))
file_prefixs = {"raw_prefix": os.path.join(
self.outputs["raw"], prefix, prefix),
"table_prefix": os.path.join(
self.outputs["table"], prefix, prefix)}
fasta_file = os.path.join(self.fasta_path, fasta)
table_path = os.path.join(self.outputs["table"],
prefix)
self._run_program(fasta_file, file_prefixs, prefix,
bam_number, table_path, args_snp)
os.remove(self.outputs["tmp"])
self.helper.remove_tmp(args_snp.fastas)
os.remove(self.bams["whole"])
os.remove(self.bams["sort"])
os.remove(self.header)
示例6: OperonDetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp [as 别名]
#.........这里部分代码省略.........
self.term_path = os.path.join(args_op.terms, "tmp")
else:
self.term_path = None
def _check_gff(self, gffs, type_):
for gff in os.listdir(gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(gffs, gff))
def _detect_operon(self, prefixs, args_op):
for prefix in prefixs:
out_table = os.path.join(self.table_path,
"_".join(["operon", prefix + ".csv"]))
print("Detection operons of {0}".format(prefix))
tss = self.helper.get_correct_file(
self.tss_path, "_TSS.gff", prefix, None, None)
tran = self.helper.get_correct_file(
self.tran_path, "_transcript.gff", prefix, None, None)
gff = self.helper.get_correct_file(
args_op.gffs, ".gff", prefix, None, None)
if self.term_path is None:
term = False
else:
term = self.helper.get_correct_file(
self.term_path, "_term.gff", prefix, None, None)
operon(tran, tss, gff, term, args_op.tss_fuzzy,
args_op.term_fuzzy, args_op.length, out_table)
def _check_and_parser_gff(self, args_op):
self._check_gff(args_op.tsss, "tss")
self._check_gff(args_op.gffs, "gff")
self._check_gff(args_op.trans, "tran")
self._check_gff(args_op.utr5s, "utr")
self._check_gff(args_op.utr3s, "utr")
self.multiparser.parser_gff(args_op.gffs, None)
self.multiparser.parser_gff(args_op.tsss, "TSS")
self.multiparser.combine_gff(args_op.gffs, self.tss_path, None, "TSS")
self.multiparser.parser_gff(args_op.trans, "transcript")
self.multiparser.combine_gff(args_op.gffs, self.tran_path,
None, "transcript")
self.multiparser.parser_gff(args_op.utr5s, "5UTR")
self.multiparser.combine_gff(args_op.gffs, self.utr5_path,
None, "5UTR")
self.multiparser.parser_gff(args_op.utr3s, "3UTR")
self.multiparser.combine_gff(args_op.gffs, self.utr3_path,
None, "3UTR")
if args_op.terms is not None:
self._check_gff(args_op.terms, "term")
self.multiparser.parser_gff(args_op.terms, "term")
self.multiparser.combine_gff(args_op.gffs, self.term_path,
None, "term")
def _stat(self, table_path, stat_folder):
for table in os.listdir(table_path):
if table.startswith("operon_") and table.endswith(".csv"):
filename = "_".join(["stat", table])
out_stat = os.path.join(stat_folder, filename)
stat(os.path.join(table_path, table), out_stat)
def _combine_gff(self, prefixs, args_op):
for prefix in prefixs:
out_file = os.path.join(args_op.output_folder, "gffs",
"_".join([prefix, "all_features.gff"]))
print("Combine all features of {0}".format(prefix))
tss = self.helper.get_correct_file(
self.tss_path, "_TSS.gff", prefix, None, None)
tran = self.helper.get_correct_file(
self.tran_path, "_transcript.gff", prefix, None, None)
gff = self.helper.get_correct_file(
args_op.gffs, ".gff", prefix, None, None)
utr5 = self.helper.get_correct_file(
self.utr5_path, "_5UTR.gff", prefix, None, None)
utr3 = self.helper.get_correct_file(
self.utr3_path, "_3UTR.gff", prefix, None, None)
if self.term_path is None:
term = None
else:
term = self.helper.get_correct_file(
self.term_path, "_term.gff", prefix, None, None)
combine_gff(gff, tran, tss, utr5, utr3, term,
args_op.tss_fuzzy, args_op.term_fuzzy, out_file)
def run_operon(self, args_op):
self._check_and_parser_gff(args_op)
prefixs = []
for gff in os.listdir(args_op.gffs):
if gff.endswith(".gff"):
prefixs.append(gff.replace(".gff", ""))
self._detect_operon(prefixs, args_op)
if args_op.statistics:
self._stat(self.table_path, args_op.stat_folder)
if args_op.combine:
self._combine_gff(prefixs, args_op)
self.helper.remove_tmp(args_op.gffs)
self.helper.remove_tmp(args_op.utr3s)
self.helper.remove_tmp(args_op.utr5s)
self.helper.remove_tmp(args_op.tsss)
self.helper.remove_tmp(args_op.trans)
if args_op.terms is not None:
self.helper.remove_tmp(args_op.terms)
示例7: Ribos
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp [as 别名]
#.........这里部分代码省略.........
regenerate_seq(first_scan_file, first_seq,
first_table, sec_seq)
print("scanning of {0}".format(prefix))
sec_scan_file = self._run_infernal(args_ribo, sec_seq,
"re_txt", prefix)
sec_table = os.path.join(
self.tmp_files["table"],
"_".join([prefix, self.suffixs["re_csv"]]))
reextract_rbs(sec_scan_file, first_table, sec_table)
shutil.move(sec_table, first_table)
modify_table(first_table, args_ribo.output_all)
return prefixs
def _merge_results(self, args_ribo):
for gff in os.listdir(args_ribo.gffs):
if gff.endswith(".gff"):
prefix = gff.replace(".gff", "")
print("Merge results of {0}".format(prefix))
pre_strain = ""
self.helper.check_make_folder(os.path.join(
self.scan_folder, prefix))
fh = open(os.path.join(args_ribo.gffs, gff))
for entry in self.gff_parser.entries(fh):
if entry.seq_id != pre_strain:
if len(pre_strain) == 0:
shutil.copyfile(os.path.join(
self.tmp_files["table"],
"_".join([entry.seq_id, self.suffixs["csv"]])),
os.path.join(
self.table_folder,
"_".join([prefix, self.suffixs["csv"]])))
else:
self.helper.merge_file(os.path.join(
self.tmp_files["table"],
"_".join([entry.seq_id, self.suffixs["csv"]])),
os.path.join(
self.table_folder,
"_".join([prefix, self.suffixs["csv"]])))
shutil.copy(os.path.join(
self.tmp_files["scan"],
"_".join([entry.seq_id, self.suffixs["txt"]])),
os.path.join(self.scan_folder, prefix))
shutil.copy(os.path.join(
self.tmp_files["scan"],
"_".join([entry.seq_id, self.suffixs["re_txt"]])),
os.path.join(self.scan_folder, prefix))
pre_strain = entry.seq_id
out_stat = os.path.join(
self.stat_folder,
"_".join(["stat", prefix, "riboswitch.txt"]))
print("compute statistics of {0}".format(prefix))
stat_and_covert2gff(os.path.join(
self.table_folder,
"_".join([prefix, self.suffixs["csv"]])),
args_ribo.ribos_id, os.path.join(
self.gff_outfolder,
"_".join([prefix, "riboswitch.gff"])),
args_ribo.fuzzy, out_stat)
fh.close()
def _remove_tmp(self, args_ribo):
self.helper.remove_tmp(args_ribo.gffs)
self.helper.remove_tmp(args_ribo.fastas)
self.helper.remove_all_content(args_ribo.out_folder, "tmp", "dir")
def _remove_overlap(self, gff_path):
for gff in os.listdir(gff_path):
if gff.endswith(".gff"):
rbs_overlap(
os.path.join(os.path.join(
self.tmp_files["table"],
"_".join([gff.replace(".gff", ""),
self.suffixs["csv"]]))),
os.path.join(gff_path, gff))
def run_ribos(self, args_ribo):
if args_ribo.fuzzy_rbs > 6:
print("Error: --fuzzy_rbs should be equal or less than 6!!")
sys.exit()
self.multiparser.parser_gff(args_ribo.gffs, None)
self.multiparser.parser_fasta(args_ribo.fastas)
self.multiparser.parser_gff(args_ribo.trans, "transcript")
self.multiparser.parser_gff(args_ribo.tsss, "TSS")
for gff in os.listdir(args_ribo.gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(
args_ribo.gffs, gff))
rbs_from_rfam(args_ribo.ribos_id, args_ribo.rfam, self.ribos_rfam)
print("compressing Rfam...")
call([os.path.join(args_ribo.infernal_path, "cmpress"),
"-F", self.ribos_rfam])
prefixs = []
self.helper.check_make_folder(self.tmp_files["fasta"])
self.helper.check_make_folder(self.tmp_files["scan"])
self.helper.check_make_folder(self.tmp_files["table"])
prefixs = self._scan_extract_rfam(prefixs, args_ribo)
self._remove_overlap(self.gff_path)
self._merge_results(args_ribo)
mapping_ribos(self.table_folder, args_ribo.ribos_id)
self._remove_tmp(args_ribo)
示例8: SubLocal
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp [as 别名]
#.........这里部分代码省略.........
self._psortb(args_sub.psortb_path, "-p", prot_seq_file,
out_raw, out_err)
elif args_sub.gram == "negative":
self._psortb(args_sub.psortb_path, "-n", prot_seq_file,
out_raw, out_err)
else:
print("Error:It is not a proper bacteria type - {0}!!".format(
args_sub.gram))
sys.exit()
out_err.close()
out_raw.close()
def _extract_result(self, args_sub, tmp_psortb_path, prefix, gff_file):
if args_sub.merge:
print("Merge to gff...")
extract_psortb(os.path.join(
tmp_psortb_path, "_".join([prefix, self.endfix_raw])),
os.path.join(tmp_psortb_path, "_".join([
prefix, self.endfix_table])),
gff_file, os.path.join(prefix + ".gff"),
args_sub.fuzzy)
shutil.move(prefix + ".gff", gff_file)
else:
extract_psortb(os.path.join(
tmp_psortb_path, "_".join([prefix, self.endfix_raw])),
os.path.join(tmp_psortb_path, "_".join([
prefix, self.endfix_table])),
None, None, args_sub.fuzzy)
def _merge_and_stat(self, gffs, tmp_psortb_path, stat_path, psortb_result):
for folder in os.listdir(gffs):
if folder.endswith(".gff_folder"):
prefix = folder.replace(".gff_folder", "")
self.helper.check_make_folder(
os.path.join(psortb_result, prefix))
merge_table = os.path.join(
psortb_result, prefix,
"_".join([prefix, self.endfix_table]))
for gff in os.listdir(os.path.join(gffs, folder)):
result = self.helper.get_correct_file(
tmp_psortb_path, "_" + self.endfix_raw,
gff.replace(".gff", ""), None, None)
shutil.copy(result, os.path.join(psortb_result, prefix))
result = self.helper.get_correct_file(
tmp_psortb_path, "_" + self.endfix_table,
gff.replace(".gff", ""), None, None)
self.helper.merge_file(result, merge_table)
self.helper.check_make_folder(os.path.join(stat_path, prefix))
stat_sublocal(merge_table,
os.path.join(
stat_path, prefix, prefix),
os.path.join(
stat_path, prefix, "_".join([
"stat", prefix, "sublocal.csv"])))
def _remove_tmps(self, args_sub):
self.helper.remove_tmp(args_sub.fastas)
self.helper.remove_tmp(args_sub.gffs)
self.helper.remove_all_content(args_sub.out_folder, "tmp", "dir")
self.helper.remove_all_content(self.out_all, "tmp", "dir")
self.helper.remove_all_content(self.out_express, "tmp", "dir")
os.remove(os.path.join(self.out_all, "tmp_log"))
if args_sub.trans is not None:
os.remove(os.path.join(self.out_express, "tmp_log"))
def run_sub_local(self, args_sub):
for gff in os.listdir(args_sub.gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(
args_sub.gffs, gff))
self.multiparser.parser_gff(args_sub.gffs, None)
self.multiparser.parser_fasta(args_sub.fastas)
if args_sub.trans is not None:
self.multiparser.parser_gff(args_sub.trans, "transcript")
self.helper.check_make_folder(self.express_tmp_path)
self.helper.check_make_folder(self.express_tmp_result)
self.helper.check_make_folder(self.all_tmp_path)
self.helper.check_make_folder(self.all_tmp_result)
for gff in os.listdir(self.gff_path):
if args_sub.trans is not None:
print("Running expressed gene now...")
prefix = self._get_protein_seq(gff, self.express_tmp_path,
self.tran_path)
self._run_psortb(args_sub, prefix, self.out_express,
self.express_tmp_path,
self.express_tmp_result)
self._extract_result(args_sub, self.express_tmp_result, prefix,
os.path.join(self.gff_path, gff))
print("Running all gene now...")
prefix = self._get_protein_seq(gff, self.all_tmp_path, None)
self._run_psortb(args_sub, prefix, self.out_all,
self.all_tmp_path, self.all_tmp_result)
self._extract_result(args_sub, self.all_tmp_result, prefix,
os.path.join(self.gff_path, gff))
self._merge_and_stat(args_sub.gffs, self.all_tmp_result,
self.all_stat_path, self.all_result)
if args_sub.trans is not None:
self._merge_and_stat(args_sub.gffs, self.express_tmp_result,
self.express_stat_path, self.express_result)
self._remove_tmps(args_sub)
示例9: Terminator
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp [as 别名]
#.........这里部分代码省略.........
"potential sequences.\n")
self._run_rnafold(args_term.RNAfold_path, tmp_seq, tmp_sec,
prefix, log)
log.write("Running extract_sec_info.py to extract the "
"information of secondary structure from {0}.\n".format(
prefix))
extract_info_sec(tmp_sec, tmp_seq, tmp_index)
os.remove(tmp_index)
log.write("Running get_polyT.py to detect the "
"terminator candidates for {0}.\n".format(prefix))
poly_t(tmp_seq, tmp_sec, gff_file, tran_file, tmp_cand, args_term)
log.write("\t" + tmp_cand + " which temporary stores terminator "
"candidates is generated.\n")
print("Detecting terminators for " + prefix)
log.write("Running detect_coverage_term.py to gain "
"high-confidence terminators for {0}.\n".format(prefix))
detect_coverage(
tmp_cand, os.path.join(merge_path, prefix + ".gff"),
os.path.join(self.tran_path, "_".join([
prefix, "transcript.gff"])),
os.path.join(self.fasta_path, prefix + ".fa"),
os.path.join(wig_path, "_".join([prefix, "forward.wig"])),
os.path.join(wig_path, "_".join([prefix, "reverse.wig"])),
os.path.join(self.tmps["hp_path"], "_".join([
prefix, self.tmps["hp_gff"]])), merge_wigs,
os.path.join(self.outfolder["term"], "_".join([
prefix, self.suffixs["gff"]])),
os.path.join(self.tmps["term_table"], "_".join([
prefix, "term_raw.csv"])), args_term)
self.multiparser.combine_gff(args_term.gffs, self.outfolder["term"],
None, "term")
self._move_file(self.outfolder["term"], self.outfolder["csv"])
def _remove_tmp_file(self, merge_wigs, args_term):
self.helper.remove_tmp_dir(args_term.gffs)
self.helper.remove_tmp_dir(args_term.fastas)
if args_term.srnas is not None:
self.helper.remove_tmp(args_term.srnas)
shutil.rmtree(self.tmps["merge"])
if (args_term.tex_wigs is not None) and (
args_term.frag_wigs is not None):
shutil.rmtree(merge_wigs)
self.helper.remove_tmp_dir(args_term.trans)
if "tmp_wig" in os.listdir(args_term.out_folder):
shutil.rmtree(os.path.join(args_term.out_folder, "tmp_wig"))
self.helper.remove_tmp(self.outfolder["term"])
shutil.rmtree(self.tmps["transterm"])
shutil.rmtree(self.tmps["term_table"])
self.helper.remove_all_content(args_term.out_folder,
"inter_seq_", "file")
self.helper.remove_all_content(self.outfolder["term"],
"_term.gff", "file")
self.helper.remove_all_content(args_term.out_folder,
"inter_sec_", "file")
self.helper.remove_all_content(args_term.out_folder,
"term_candidates_", "file")
def _compute_stat(self, args_term, log):
new_prefixs = []
for gff in os.listdir(self.terms["all"]):
if gff.endswith("_term_all.gff"):
out_tmp = open(self.tmps["gff"], "w")
out_tmp.write("##gff-version 3\n")
new_prefix = gff.replace("_term_all.gff", "")
new_prefixs.append(gff.replace("_term_all.gff", ""))
num = 0
示例10: MEME
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp [as 别名]
#.........这里部分代码省略.........
self.helper.merge_file(os.path.join(
self.tss_path, tss), self.all_tss)
for fasta in os.listdir(args_pro.fastas):
if (fasta.endswith(".fa")) or (
fasta.endswith(".fna")) or (
fasta.endswith(".fasta")):
self.helper.merge_file(os.path.join(
args_pro.fastas, fasta), self.all_fasta)
else:
for tss in os.listdir(os.path.join(
args_pro.output_folder, "TSS_class")):
if tss.endswith("_TSS.gff"):
self.helper.merge_file(os.path.join(
self.tss_path, tss), self.all_tss)
for fasta in os.listdir(args_pro.fastas):
if (fasta.endswith(".fa")) or (
fasta.endswith(".fna")) or (
fasta.endswith(".fasta")):
self.helper.merge_file(os.path.join(
args_pro.fastas, fasta), self.all_fasta)
print("generating fasta file of all fasta files")
prefixs.append("allfasta")
input_path = os.path.join(self.out_fasta, "allfasta")
self.helper.check_make_folder(os.path.join(
args_pro.output_folder, "allfasta"))
self.helper.check_make_folder(os.path.join(
self.out_fasta, "allfasta"))
args_pro.source = True
upstream(self.all_tss, self.all_fasta, None,
None, args_pro)
self._move_and_merge_fasta(input_path, "allfasta")
def _remove_files(self, args_pro):
self.helper.remove_tmp(args_pro.fastas)
self.helper.remove_tmp(args_pro.tsss)
self.helper.remove_tmp(args_pro.gffs)
self.helper.remove_tmp(args_pro.wigs)
if "allfasta.fa" in os.listdir(args_pro.fastas):
os.remove(self.all_fasta)
if "allfasta" in os.listdir(os.getcwd()):
shutil.rmtree("allfasta")
shutil.rmtree("tmp")
def _gen_table(self, output_folder, prefixs, combine):
if combine:
strains = prefixs + ["allfasta"]
else:
strains = prefixs
for strain in strains:
for folder in os.listdir(os.path.join(output_folder, strain)):
tss_file = os.path.join(self.tss_path, strain + "_TSS.gff")
gen_promoter_table(os.path.join(output_folder, strain,
folder, "meme.txt"),
os.path.join(output_folder, strain,
folder, "meme.csv"), tss_file)
def _get_upstream(self, args_pro, prefix, tss, fasta):
if args_pro.source:
print("generating fasta file of {0}".format(prefix))
upstream(os.path.join(self.tss_path, tss),
os.path.join(args_pro.fastas, fasta),
None, None, args_pro)
else:
if (args_pro.gffs is None) or (
args_pro.wigs is None) or (
args_pro.input_libs is None):
示例11: sRNATargetPrediction
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp [as 别名]
#.........这里部分代码省略.........
if "_".join([prefix, "RNAup.txt"]) in \
os.listdir(os.path.join(self.rnaup_path, prefix)):
if not args_tar.continue_rnaup:
os.remove(out_rnaup)
os.remove(out_log)
else:
srnas = self._get_continue(out_rnaup)
with open(os.path.join(self.srna_seq_path, "_".join([
self.tmps["tmp"], prefix, "sRNA.fa"])), "r") as s_f:
for line in s_f:
line = line.strip()
if line.startswith(">"):
if line[1:] in srnas:
start = False
continue
start = True
print("Running RNAup with {0}".format(line[1:]))
num_up += 1
out_up = open(os.path.join(args_tar.out_folder,
"".join([self.tmps["tmp"],
str(num_up), ".fa"])), "w")
out_up.write(line + "\n")
else:
if start:
out_up.write(line + "\n")
out_up.close()
self.helper.merge_file(os.path.join(
self.target_seq_path,
"_".join([prefix, "target.fa"])),
os.path.join(args_tar.out_folder,
"".join([self.tmps["tmp"],
str(num_up), ".fa"])))
if num_up == args_tar.core_up:
self._run_rnaup(num_up, processes,
out_rnaup, out_log, args_tar)
processes = []
num_up = 0
self._run_rnaup(num_up, processes, out_rnaup, out_log, args_tar)
def _merge_rnaplex_rnaup(self, prefixs, args_tar):
for prefix in prefixs:
rnaplex_file = None
rnaup_file = None
out_rnaplex = None
out_rnaup = None
self.helper.check_make_folder(os.path.join(
self.merge_path, prefix))
print("Ranking {0} now...".format(prefix))
if (args_tar.program == "both") or (args_tar.program == "RNAplex"):
rnaplex_file = os.path.join(self.rnaplex_path, prefix,
"_".join([prefix, "RNAplex.txt"]))
out_rnaplex = os.path.join(
self.rnaplex_path, prefix,
"_".join([prefix, "RNAplex_rank.csv"]))
if (args_tar.program == "both") or (args_tar.program == "RNAup"):
rnaup_file = os.path.join(self.rnaup_path, prefix,
"_".join([prefix, "RNAup.txt"]))
out_rnaup = os.path.join(self.rnaup_path, prefix,
"_".join([prefix, "RNAup_rank.csv"]))
merge_srna_target(rnaplex_file, rnaup_file, args_tar,
out_rnaplex, out_rnaup,
os.path.join(self.merge_path, prefix,
"_".join([prefix, "merge.csv"])),
os.path.join(self.merge_path, prefix,
"_".join([prefix, "overlap.csv"])),
os.path.join(self.srna_path,
"_".join([prefix, "sRNA.gff"])),
os.path.join(self.gff_path, prefix + ".gff"))
def run_srna_target_prediction(self, args_tar):
self._check_gff(args_tar.gffs)
self._check_gff(args_tar.srnas)
self.multiparser.parser_gff(args_tar.gffs, None)
self.multiparser.parser_fasta(args_tar.fastas)
self.multiparser.parser_gff(args_tar.srnas, "sRNA")
prefixs = []
self._gen_seq(prefixs, args_tar)
if (args_tar.program == "both") or (
args_tar.program == "RNAplex"):
self._rna_plex(prefixs, args_tar)
self.helper.remove_all_content(self.target_seq_path,
"_target_", "file")
if (args_tar.program == "both") or (
args_tar.program == "RNAup"):
self._rnaup(prefixs, args_tar)
self._merge_rnaplex_rnaup(prefixs, args_tar)
if (args_tar.program == "RNAplex") or (
args_tar.program == "both"):
for strain in os.listdir(os.path.join(
args_tar.out_folder, "RNAplex")):
shutil.rmtree(os.path.join(args_tar.out_folder, "RNAplex",
strain, "RNAplfold"))
self.helper.remove_all_content(args_tar.out_folder,
self.tmps["tmp"], "dir")
self.helper.remove_all_content(args_tar.out_folder,
self.tmps["tmp"], "file")
self.helper.remove_tmp(args_tar.gffs)
self.helper.remove_tmp(args_tar.srnas)
self.helper.remove_tmp(args_tar.fastas)
self.helper.remove_all_content(self.srna_seq_path, "tmp_", "file")
示例12: CircRNADetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp [as 别名]
#.........这里部分代码省略.........
for prefix in tmp_prefixs:
self.helper.check_make_folder(os.path.join(self.gff_folder,
prefix))
shutil.copytree(prefix, os.path.join(self.splice_path, prefix))
self.helper.check_make_folder(os.path.join(
self.candidate_path, prefix))
print("comparing with annotation of {0}".format(prefix))
if self.splices["all_file"] in os.listdir(os.path.join(
self.splice_path, prefix)):
detect_circrna(os.path.join(self.splice_path, prefix,
self.splices["all_file"]), os.path.join(
self.gff_path, prefix + ".gff"),
os.path.join(self.candidate_path, prefix,
"_".join(["circRNA", prefix + "_all.csv"])),
args_circ, os.path.join(args_circ.stat_folder,
"_".join(["stat_circRNA", prefix + ".csv"])))
self.converter.convert_circ2gff(
os.path.join(self.candidate_path, prefix,
"_".join(["circRNA",
prefix + "_all.csv"])),
args_circ, os.path.join(
self.gff_folder, prefix,
"_".join([prefix, "circRNA_all.gff"])),
os.path.join(self.gff_folder, prefix,
"_".join([prefix, "circRNA_best.gff"])))
def _assign_merge_bam(self, args_circ):
remove_frags = []
bam_files = []
if (args_circ.normal_bams is not None) and (
args_circ.frag_bams is not None):
for frag in os.listdir(args_circ.frag_bams):
if frag.endswith(".bam"):
shutil.copyfile(os.path.join(args_circ.frag_bams, frag),
os.path.join(args_circ.normal_bams, frag))
remove_frags.append(frag)
merge_folder = args_circ.normal_bams
elif (args_circ.normal_bams is not None):
merge_folder = args_circ.normal_bams
elif (args_circ.frag_bams is not None):
merge_folder = args_circ.frag_bams
else:
print("Error: please assign bam folder or do alignment!!")
sys.exit()
for bam in os.listdir(merge_folder):
if bam.endswith(".bam"):
bam_files.append(os.path.join(merge_folder, bam))
return merge_folder, remove_frags, bam_files
def run_circrna(self, args_circ):
for gff in os.listdir(args_circ.gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(
args_circ.gffs, gff))
if args_circ.segemehl_path is None:
print("Error: please assign segemehl folder!!")
sys.exit()
self.multiparser.parser_gff(args_circ.gffs, None)
self.multiparser.combine_gff(args_circ.fastas, self.gff_path,
"fasta", None)
tmp_reads = []
if args_circ.align:
self.multiparser.parser_fasta(args_circ.fastas)
tmp_reads = self._deal_zip_file(args_circ.read_folder)
align_files, prefixs = self._align(args_circ)
else:
self.multiparser.parser_fasta(args_circ.fastas)
prefixs = []
for fasta in os.listdir(self.fasta_path):
fasta_prefix = fasta.replace(".fa", "")
prefixs.append(fasta_prefix)
merge_folder, remove_frag, bam_files = self._assign_merge_bam(
args_circ)
align_files = None
for prefix in prefixs:
if args_circ.align:
sub_alignment_path = os.path.join(self.alignment_path, prefix)
bam_files, convert_ones, remove_ones = self._convert_sam2bam(
sub_alignment_path, args_circ.samtools_path, align_files)
else:
sub_alignment_path = merge_folder
convert_ones = []
remove_ones = []
self._merge_sort_aligment_file(
bam_files, args_circ.samtools_path, sub_alignment_path,
convert_ones, tmp_reads, remove_ones)
self._run_testrealign(prefix, args_circ.segemehl_path,
sub_alignment_path)
tmp_prefixs = self._merge_bed(args_circ.fastas, self.splice_path)
self.multiparser.parser_gff(args_circ.gffs, None)
self.multiparser.combine_gff(args_circ.fastas, self.gff_path,
"fasta", None)
self._stat_and_gen_gff(tmp_prefixs, args_circ)
self.helper.remove_tmp(args_circ.fastas)
self.helper.remove_tmp(args_circ.gffs)
for tmp_prefix in tmp_prefixs:
shutil.rmtree(tmp_prefix)
if (not args_circ.align) and (len(remove_frag) != 0):
for frag in remove_frag:
os.remove(os.path.join(merge_folder, frag))
示例13: UTRDetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp [as 别名]
class UTRDetection(object):
def __init__(self, args_utr):
self.helper = Helper()
self.multiparser = Multiparser()
self.tss_path = os.path.join(args_utr.tsss, "tmp")
self.tran_path = os.path.join(args_utr.trans, "tmp")
self.utr5_path = os.path.join(args_utr.out_folder, "5UTR")
self.utr3_path = os.path.join(args_utr.out_folder, "3UTR")
self.utr5_stat_path = os.path.join(self.utr5_path, "statistics")
self.utr3_stat_path = os.path.join(self.utr3_path, "statistics")
def _check_folder(self, folder):
if folder is None:
print("Error: lack required files!!!")
sys.exit()
def _check_gff(self, folder):
for gff in os.listdir(folder):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(folder, gff))
def _compute_utr(self, args_utr):
for gff in os.listdir(args_utr.gffs):
if gff.endswith(".gff"):
prefix = gff[:-4]
tss = self.helper.get_correct_file(
self.tss_path, "_TSS.gff", prefix, None, None)
tran = self.helper.get_correct_file(
self.tran_path, "_transcript.gff", prefix, None, None)
if args_utr.terms:
term = self.helper.get_correct_file(
os.path.join(args_utr.terms, "tmp"),
"_term.gff", prefix, None, None)
else:
term = None
print("computing 5'UTR of {0} .....".format(prefix))
detect_5utr(tss, os.path.join(args_utr.gffs, gff),
tran, os.path.join(self.utr5_path, "gffs",
"_".join([prefix, "5UTR.gff"])), args_utr)
print("computing 3'UTR of {0} .....".format(prefix))
detect_3utr(tran, os.path.join(args_utr.gffs, gff),
term, os.path.join(self.utr3_path, "gffs",
"_".join([prefix, "3UTR.gff"])), args_utr)
self.helper.move_all_content(
os.getcwd(), self.utr5_stat_path, ["_5utr_length.png"])
self.helper.move_all_content(
os.getcwd(), self.utr3_stat_path, ["_3utr_length.png"])
def run_utr_detection(self, args_utr):
self._check_folder(args_utr.tsss)
self._check_folder(args_utr.gffs)
self._check_folder(args_utr.trans)
self._check_gff(args_utr.tsss)
self._check_gff(args_utr.gffs)
self._check_gff(args_utr.trans)
self._check_gff(args_utr.terms)
self.multiparser.parser_gff(args_utr.gffs, None)
self.multiparser.parser_gff(args_utr.tsss, "TSS")
self.multiparser.combine_gff(args_utr.gffs, self.tss_path, None, "TSS")
self.multiparser.parser_gff(args_utr.trans, "transcript")
self.multiparser.combine_gff(args_utr.gffs, self.tran_path,
None, "transcript")
if args_utr.terms:
self.multiparser.parser_gff(args_utr.terms, "term")
self.multiparser.combine_gff(args_utr.gffs,
os.path.join(args_utr.terms, "tmp"),
None, "term")
self._compute_utr(args_utr)
self.helper.remove_tmp(args_utr.gffs)
self.helper.remove_tmp(args_utr.tsss)
self.helper.remove_tmp(args_utr.trans)
self.helper.remove_tmp(args_utr.terms)
self.helper.remove_tmp(self.utr5_path)
self.helper.remove_tmp(self.utr3_path)
示例14: TSSpredator
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp [as 别名]
#.........这里部分代码省略.........
self.helper.check_make_folder(os.path.join(os.getcwd(),
self.tmps["tmp"]))
for wig_file in os.listdir(wig_folder):
for lib in libs:
info = lib.split(":")
if (info[0][:-4] in wig_file) and (info[-1] == "+") and (
prefix in wig_file) and (
os.path.isfile(os.path.join(wig_folder, wig_file))):
Helper().merge_file(
os.path.join(wig_folder, wig_file),
os.path.join("tmp", "merge_forward.wig"))
if (info[0][:-4] in wig_file) and (info[-1] == "-") and (
prefix in wig_file) and (
os.path.isfile(os.path.join(wig_folder, wig_file))):
Helper().merge_file(
os.path.join(wig_folder, wig_file),
os.path.join("tmp", "merge_reverse.wig"))
def _check_orphan(self, prefixs, wig_folder, args_tss):
for prefix in prefixs:
self._merge_wigs(wig_folder, prefix, args_tss.libs)
tmp_tss = os.path.join(self.tmps["tmp"], "_".join([
prefix, args_tss.program + ".gff"]))
pre_tss = os.path.join(self.gff_outfolder, "_".join([
prefix, args_tss.program + ".gff"]))
check_orphan(pre_tss, os.path.join(
args_tss.gffs, prefix + ".gff"),
"tmp/merge_forward.wig", "tmp/merge_reverse.wig", tmp_tss)
shutil.move(tmp_tss, pre_tss)
shutil.rmtree("tmp")
def _remove_files(self, args_tss):
print("Remove temperary files and folders...")
self.helper.remove_tmp(args_tss.fastas)
self.helper.remove_tmp(args_tss.gffs)
self.helper.remove_tmp(args_tss.wig_folder)
self.helper.remove_tmp(args_tss.ta_files)
if "merge_forward.wig" in os.listdir(os.getcwd()):
os.remove("merge_forward.wig")
if "merge_reverse.wig" in os.listdir(os.getcwd()):
os.remove("merge_reverse.wig")
def _deal_with_overlap(self, out_folder, args_tss):
if args_tss.overlap_feature.lower() == "both":
pass
else:
print("Comparing TSS and Processing site...")
if args_tss.program.lower() == "tss":
for tss in os.listdir(out_folder):
if tss.endswith("_TSS.gff"):
ref = self.helper.get_correct_file(
args_tss.references, "_processing.gff",
tss.replace("_TSS.gff", ""), None, None)
filter_tss_pro(os.path.join(out_folder, tss),
ref, args_tss.overlap_feature,
args_tss.cluster)
elif args_tss.program.lower() == "processing_site":
for tss in os.listdir(out_folder):
if tss.endswith("_processing.gff"):
ref = self.helper.get_correct_file(
args_tss.references, "_TSS.gff",
tss.replace("_processing.gff", ""), None, None)
filter_tss_pro(os.path.join(out_folder, tss),
ref, args_tss.overlap_feature,
args_tss.cluster)
示例15: GoTermFinding
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp [as 别名]
#.........这里部分代码省略.........
self.stat_all_path = os.path.join(self.out_all, "statistics")
self.stat_express_path = os.path.join(self.out_express,
"statistics")
self.all_strain = "all_strains_uniprot.csv"
def _retrieve_go(self, uniprot, out_path, type_):
prefixs = []
for gff in os.listdir(self.gff_path):
prefix = gff.replace(".gff", "")
prefixs.append(prefix)
self.helper.check_make_folder(os.path.join(out_path, prefix))
out_file = os.path.join(out_path, prefix,
"_".join([prefix, "uniprot.csv"]))
print("extracting Go terms of {0} from UniProt...".format(prefix))
if self.tran_path is not None:
tran_file = os.path.join(self.tran_path,
"_".join([prefix, "transcript.gff"]))
else:
tran_file = None
retrieve_uniprot(uniprot, os.path.join(self.gff_path, gff),
out_file, tran_file, type_)
def _merge_files(self, gffs, out_path, out_folder):
folders = []
for folder in os.listdir(gffs):
if folder.endswith("gff_folder"):
folder_prefix = folder.replace(".gff_folder", "")
folder_path = os.path.join(out_folder, folder_prefix)
self.helper.check_make_folder(folder_path)
folders.append(folder_path)
filenames = []
for gff in os.listdir(os.path.join(gffs, folder)):
if gff.endswith(".gff"):
filenames.append(gff.replace(".gff", ""))
out_all = os.path.join(folder_path, self.all_strain)
if len(filenames) > 1:
if self.all_strain in os.listdir(folder_path):
os.remove(out_all)
for filename in filenames:
csv_file = "_".join([filename, "uniprot.csv"])
self.helper.merge_file(os.path.join(out_path,
filename, csv_file), out_all)
shutil.copy(os.path.join(out_path, filename, csv_file),
folder_path)
else:
shutil.copyfile(os.path.join(out_path, filenames[0],
"_".join([filenames[0], "uniprot.csv"])),
out_all)
self.helper.remove_all_content(out_path, None, "dir")
self.helper.remove_all_content(out_path, None, "file")
for folder in folders:
folder_prefix = folder.split("/")[-1]
shutil.move(folder, os.path.join(out_path, folder_prefix))
def _stat(self, out_path, stat_path, go, goslim, out_folder):
for folder in os.listdir(out_path):
strain_stat_path = os.path.join(stat_path, folder)
self.helper.check_make_folder(strain_stat_path)
fig_path = os.path.join(strain_stat_path, "figs")
if "fig" not in os.listdir(strain_stat_path):
os.mkdir(fig_path)
print("Computing statistics of {0}".format(folder))
map2goslim(goslim, go,
os.path.join(out_path, folder, self.all_strain),
os.path.join(strain_stat_path,
"_".join(["stat", folder + ".csv"])),
out_folder)
self.helper.move_all_content(out_folder, fig_path,
["_three_roots.png"])
self.helper.move_all_content(out_folder, fig_path,
["_molecular_function.png"])
self.helper.move_all_content(out_folder, fig_path,
["_cellular_component.png"])
self.helper.move_all_content(out_folder, fig_path,
["_biological_process.png"])
def run_go_term(self, args_go):
for gff in os.listdir(args_go.gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(
args_go.gffs, gff))
self.multiparser.parser_gff(args_go.gffs, None)
if args_go.trans is not None:
self.multiparser.parser_gff(args_go.trans, "transcript")
print("Computing all CDS...")
self._retrieve_go(args_go.uniprot, self.result_all_path, "all")
self._merge_files(args_go.gffs, self.result_all_path, self.out_all)
self._stat(self.result_all_path, self.stat_all_path, args_go.go,
args_go.goslim, self.out_all)
if args_go.trans is not None:
print("Computing express CDS...")
self._retrieve_go(args_go.uniprot, self.result_express_path,
"express")
self._merge_files(args_go.gffs, self.result_express_path,
self.out_express)
self._stat(self.result_express_path, self.stat_express_path,
args_go.go, args_go.goslim, self.out_express)
self.helper.remove_tmp(args_go.gffs)
if args_go.trans is not None:
self.helper.remove_tmp(args_go.trans)