本文整理汇总了Python中annogesiclib.helper.Helper.remove_tmp_dir方法的典型用法代码示例。如果您正苦于以下问题:Python Helper.remove_tmp_dir方法的具体用法?Python Helper.remove_tmp_dir怎么用?Python Helper.remove_tmp_dir使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类annogesiclib.helper.Helper
的用法示例。
在下文中一共展示了Helper.remove_tmp_dir方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: sORFDetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp_dir [as 别名]
#.........这里部分代码省略.........
srna_file, os.path.join(args_sorf.out_folder,
"_".join([prefix, "inter.gff"])), tss_file,
os.path.join(args_sorf.wig_path,
"_".join([prefix, "forward.wig"])),
os.path.join(args_sorf.wig_path,
"_".join([prefix, "reverse.wig"])),
os.path.join(self.gff_output, self.all_cand,
"_".join([prefix, "sORF"])), args_sorf)
if "_".join([prefix, "sORF_all.gff"]) in os.listdir(
os.path.join(self.gff_output, self.all_cand)):
gff_all = os.path.join(self.gff_output, self.all_cand,
"_".join([prefix, "sORF.gff"]))
gff_best = os.path.join(self.gff_output, self.best,
"_".join([prefix, "sORF.gff"]))
csv_all = os.path.join(self.table_output, self.all_cand,
"_".join([prefix, "sORF.csv"]))
csv_best = os.path.join(self.table_output, self.best,
"_".join([prefix, "sORF.csv"]))
shutil.move(os.path.join(self.gff_output, self.all_cand,
"_".join([prefix, "sORF_all.gff"])), gff_all)
shutil.move(os.path.join(self.gff_output, self.all_cand,
"_".join([prefix, "sORF_best.gff"])), gff_best)
shutil.move(os.path.join(self.gff_output, self.all_cand,
"_".join([prefix, "sORF_all.csv"])), csv_all)
shutil.move(os.path.join(self.gff_output, self.all_cand,
"_".join([prefix, "sORF_best.csv"])), csv_best)
log.write("\t" + gff_all + "\n")
log.write("\t" + gff_best + "\n")
log.write("\t" + csv_all + "\n")
log.write("\t" + csv_best + "\n")
def _remove_tmp(self, args_sorf):
self.helper.remove_all_content(args_sorf.out_folder, ".gff", "file")
self.helper.remove_tmp_dir(args_sorf.fastas)
self.helper.remove_tmp_dir(args_sorf.gffs)
self.helper.remove_tmp_dir(args_sorf.tsss)
self.helper.remove_tmp_dir(args_sorf.trans)
self.helper.remove_tmp_dir(args_sorf.srnas)
if "temp_wig" in os.listdir(args_sorf.out_folder):
shutil.rmtree(os.path.join(args_sorf.out_folder, "temp_wig"))
if "merge_wigs" in os.listdir(args_sorf.out_folder):
shutil.rmtree(os.path.join(args_sorf.out_folder, "merge_wigs"))
def _compare_tran_cds(self, args_sorf, log):
'''compare transcript and CDS to find the intergenic region'''
prefixs = []
log.write("Running sORF_intergenic.py to extract the sequences of "
"potential sORFs\n")
for gff in os.listdir(args_sorf.gffs):
if gff.endswith(".gff"):
prefix = gff.replace(".gff", "")
prefixs.append(prefix)
print("Comparing transcripts and CDSs of {0}".format(prefix))
get_intergenic(os.path.join(args_sorf.gffs, gff),
os.path.join(self.tran_path,
"_".join([prefix, "transcript.gff"])),
os.path.join(args_sorf.out_folder,
"_".join([prefix, "inter.gff"])),
args_sorf.utr_detect, args_sorf.hypo,
args_sorf.extend_5, args_sorf.extend_3)
log.write("\t" + os.path.join(args_sorf.out_folder,
"_".join([prefix, "inter.gff"])) +
" is generated to temporary store the sequences.\n")
return prefixs
def _re_table(self, args_sorf, prefixs, log):
示例2: Ribos
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp_dir [as 别名]
#.........这里部分代码省略.........
table_folder,
"_".join([prefix, suffixs["csv"]])))
shutil.copy(os.path.join(
tmp_files["scan"],
"_".join([entry.seq_id, suffixs["txt"]])),
os.path.join(scan_folder, prefix))
shutil.copy(os.path.join(
tmp_files["scan"],
"_".join([entry.seq_id, suffixs["re_txt"]])),
os.path.join(scan_folder, prefix))
pre_strain = entry.seq_id
log.write("The following files are generated.\n")
for folder in (table_folder, scan_folder):
for file_ in os.listdir(folder):
log.write("\t" + os.path.join(folder, file_) + "\n")
out_stat = os.path.join(
stat_folder,
"_".join(["stat", prefix, feature + ".txt"]))
print("Computing statistics of {0}".format(prefix))
log.write("Running ribo_gff.py to do statistics and generate "
"gff files for {0}.\n".format(prefix))
log.write("The following files are generated:\n")
out_gff = os.path.join(gff_outfolder, "_".join([
prefix, feature + ".gff"]))
stat_and_covert2gff(os.path.join(
table_folder, "_".join([prefix, suffixs["csv"]])),
feature_id, out_gff,
args_ribo.fuzzy, out_stat, feature)
log.write("\t" + out_gff + "\n")
log.write("\t" + out_stat + "\n")
fh.close()
def _remove_tmp(self, args_ribo):
self.helper.remove_tmp_dir(args_ribo.gffs)
self.helper.remove_tmp_dir(args_ribo.fastas)
self.helper.remove_tmp_dir(args_ribo.trans)
self.helper.remove_tmp_dir(args_ribo.tsss)
def _remove_overlap(self, gff_path, tmp_files, suffixs, type_, fuzzy, log):
log.write("Running rbs_overlap.py to remove the overlapping "
"riboswitches/RNA thermometers.\n")
for gff in os.listdir(gff_path):
if gff.endswith(".gff"):
tmp_table = os.path.join(os.path.join(
tmp_files["table"], "_".join([
gff.replace(".gff", ""), suffixs["csv"]])))
rbs_overlap(tmp_table,
os.path.join(gff_path, gff), type_, fuzzy)
log.write("\t" + tmp_table + " is updated.\n")
def _core_prediction(self, args_ribo, feature_id, rfam, tmp_files,
table_folder, feature, scan_folder, suffixs,
stat_folder, gff_outfolder, out_folder, type_, log):
'''main part of detection'''
log.write("Running get_Rfam_ribo.py to get the information of "
"riboswitches/RNA thermometers from Rfam.\n")
rbs_from_rfam(feature_id, args_ribo.rfam, rfam)
log.write("Using Infernal to compress the Rfam data of "
"riboswitches/RNA thermometers.\n")
log.write("Please make sure the version of Infernal is at least 1.1.1.\n")
print("Compressing Rfam of " + feature)
log.write(" ".join([args_ribo.cmpress_path, "-F", rfam]) + "\n")
call([args_ribo.cmpress_path, "-F", rfam])
log.write("Done!\n")
prefixs = []
self.helper.check_make_folder(tmp_files["fasta"])
示例3: MEME
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp_dir [as 别名]
#.........这里部分代码省略.........
self.helper.merge_file(os.path.join(
self.tss_path, tss), self.all_tss)
for fasta in os.listdir(args_pro.fastas):
if (fasta.endswith(".fa")) or (
fasta.endswith(".fna")) or (
fasta.endswith(".fasta")):
self.helper.merge_file(os.path.join(
args_pro.fastas, fasta), self.all_fasta)
else:
for tss in os.listdir(os.path.join(
args_pro.output_folder, "TSS_classes")):
if tss.endswith("_TSS.gff"):
self.helper.merge_file(os.path.join(
self.tss_path, tss), self.all_tss)
for fasta in os.listdir(args_pro.fastas):
if (fasta.endswith(".fa")) or (
fasta.endswith(".fna")) or (
fasta.endswith(".fasta")):
self.helper.merge_file(os.path.join(
args_pro.fastas, fasta), self.all_fasta)
print("Generating fasta file of all sequences")
prefixs.append("allfasta")
input_path = os.path.join(self.out_fasta, "allfasta")
self.helper.check_make_folder(os.path.join(
args_pro.output_folder, "allfasta"))
self.helper.check_make_folder(os.path.join(
self.out_fasta, "allfasta"))
args_pro.source = True
upstream(self.all_tss, self.all_fasta, None,
None, args_pro, None)
self._move_and_merge_fasta(input_path, "allfasta")
def _remove_files(self, args_pro):
self.helper.remove_tmp_dir(args_pro.fastas)
self.helper.remove_tmp_dir(args_pro.tsss)
self.helper.remove_tmp_dir(args_pro.gffs)
if "tmp_wig" in os.listdir(args_pro.output_folder):
shutil.rmtree(os.path.join(args_pro.output_folder, "tmp_wig"))
if "allfasta" in os.listdir(os.getcwd()):
shutil.rmtree("allfasta")
if "tmp" in os.listdir(os.getcwd()):
shutil.rmtree("tmp")
def _gen_table(self, output_folder, prefixs, combine, program, log):
'''generate the promoter table'''
log.write("Running gen_promoter_table.py to generate promoter "
"table which is useful for sRNA prediction.\n")
log.write("The following files are generated:\n")
if combine:
strains = prefixs + ["allfasta"]
else:
strains = prefixs
for strain in strains:
tss_file = os.path.join(self.tss_path, strain + "_TSS.gff")
if (program.lower() == "both") or (
program.lower() == "meme"):
for folder in os.listdir(os.path.join(output_folder,
strain, "MEME")):
csv_file = os.path.join(output_folder, strain,
"MEME", folder, "meme.csv")
gen_promoter_table(os.path.join(output_folder, strain,
"MEME", folder, "meme.txt"),
csv_file, tss_file, "meme")
log.write("\t" + csv_file + "\n")
if (program.lower() == "both") or (
program.lower() == "glam2"):
示例4: sRNATargetPrediction
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp_dir [as 别名]
#.........这里部分代码省略.........
checks[pre].append(line)
print_ = True
else:
if (num != 1):
if (line not in checks[srna_info]):
checks[srna_info].append(line)
print_ = True
else:
if (print_):
if (pre_num != len(checks)):
out_tmp.write(srna_info + "\n")
out_tmp.write(checks[srna_info][-1] + "\n")
out_tmp.write(line + "\n")
else:
if (not tar):
out_tmp.write(checks[srna_info][-1] + "\n")
out_tmp.write(line + "\n")
pre_num = len(checks)
tar = True
pre = line
num = num + 1
return num, pre_num, print_, pre, tar, srna_info
def _remove_intarna(self, line, checks, tar, srna_info, seq, out_tmp):
if (line.startswith(".")) or (
line.startswith("(")) or (
line.startswith(")")):
seq = line.split(";")[0]
if (seq not in checks[tar][srna_info]):
checks[tar][srna_info].append(seq)
out_tmp.write(line + "\n")
else:
if (len(line.split(";")) >= 8):
tar = line.split(";")[0]
srna_info = line.split(";")[3]
seq = line.split(";")[7]
if (tar not in checks):
checks[tar] = {}
checks[tar][srna_info] = [seq]
out_tmp.write(line + "\n")
else:
if (srna_info not in checks[tar]):
checks[tar][srna_info] = [seq]
out_tmp.write(line + "\n")
return tar, srna_info, seq
def _remove_repeat(self, interact_file, type_):
checks = {}
seq = ""
pre = ""
srna_info = ""
num = 1
tar = False
pre_num = 0
print_ = False
out_tmp = open(interact_file + "tmp", "w")
with open(interact_file) as fh:
for line in fh:
line = line.strip()
if (type_ == "RNAplex"):
num, pre_num, print_, pre = self._remove_rnaplex(
line, num, pre_num, pre, checks, out_tmp, print_)
elif (type_ == "RNAup"):
num, pre_num, print_, pre, tar, srna_info = (
self._remove_rnaup(
line, pre, num, pre_num,
srna_info, checks, out_tmp, print_, tar))
elif (type_ == "IntaRNA"):
tar, srna_info, seq = self._remove_intarna(
line, checks, tar, srna_info, seq, out_tmp)
out_tmp.close()
shutil.move(interact_file + "tmp", interact_file)
def run_srna_target_prediction(self, args_tar, log):
self._check_gff(args_tar.gffs)
self._check_gff(args_tar.srnas)
self.multiparser.parser_gff(args_tar.gffs, None)
self.multiparser.parser_fasta(args_tar.fastas)
self.multiparser.parser_gff(args_tar.srnas, "sRNA")
prefixs = []
self._gen_seq(prefixs, args_tar)
if ("RNAplex" in args_tar.program):
self._rna_plex(prefixs, args_tar, log)
self.helper.remove_all_content(self.target_seq_path,
"_target_", "file")
log.write("The temporary files for running RNAplex are deleted.\n")
if ("RNAup" in args_tar.program):
self._rnaup(prefixs, args_tar, log)
if ("IntaRNA" in args_tar.program):
self._intarna(prefixs, args_tar, log)
self._merge_rnaplex_rnaup(prefixs, args_tar, log)
self.helper.remove_all_content(args_tar.out_folder,
self.tmps["tmp"], "dir")
self.helper.remove_all_content(args_tar.out_folder,
self.tmps["tmp"], "file")
self.helper.remove_tmp_dir(args_tar.gffs)
self.helper.remove_tmp_dir(args_tar.srnas)
self.helper.remove_tmp_dir(args_tar.fastas)
self.helper.remove_all_content(self.srna_seq_path, "tmp_", "file")
示例5: TranscriptDetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp_dir [as 别名]
#.........这里部分代码省略.........
fill_gap(os.path.join(args_tran.gffs, gff),
os.path.join(self.tran_path,
"_".join([ta, self.endfix_tran])),
"overlap", self.tmps["overlap"], args_tran.modify)
fill_gap(os.path.join(args_tran.gffs, gff),
os.path.join(self.tran_path,
"_".join([ta, self.endfix_tran])),
"uni", self.tmps["uni"], args_tran.modify)
tmp_merge = os.path.join(self.gff_outfolder, self.tmps["merge"])
if self.tmps["merge"] in self.gff_outfolder:
os.remove(tmp_merge)
self.helper.merge_file(self.tmps["overlap"], tmp_merge)
self.helper.merge_file(self.tmps["uni"], tmp_merge)
tmp_out = os.path.join(self.gff_outfolder, "_".join(["tmp", ta]))
self.helper.sort_gff(tmp_merge, tmp_out)
os.remove(self.tmps["overlap"])
os.remove(self.tmps["uni"])
os.remove(tmp_merge)
final_out = os.path.join(self.gff_outfolder,
"_".join(["final", ta]))
longer_ta(tmp_out, args_tran.length, final_out)
shutil.move(final_out,
os.path.join(self.tmps["tran"],
"_".join([ta, self.endfix_tran])))
os.remove(tmp_out)
shutil.rmtree(self.gff_outfolder)
shutil.move(self.tmps["tran"], self.gff_outfolder)
def _remove_file(self, args_tran):
if "tmp_wig" in os.listdir(args_tran.out_folder):
shutil.rmtree(os.path.join(args_tran.out_folder, "tmp_wig"))
if "merge_wigs" in os.listdir(args_tran.out_folder):
shutil.rmtree(os.path.join(args_tran.out_folder, "merge_wigs"))
self.helper.remove_tmp_dir(args_tran.gffs)
self.helper.remove_tmp_dir(args_tran.compare_tss)
self.helper.remove_tmp_dir(args_tran.terms)
self.helper.remove_tmp(os.path.join(args_tran.out_folder, "gffs"))
self.helper.remove_tmp(self.gff_outfolder)
def _compare_term_tran(self, args_tran, log):
'''searching the associated terminator to transcript'''
if args_tran.terms is not None:
print("Comparing between terminators and transcripts")
self.multiparser.parser_gff(args_tran.terms, "term")
if args_tran.gffs is not None:
self.multiparser.combine_gff(
args_tran.gffs,
os.path.join(args_tran.terms, "tmp"), None, "term")
log.write("Running compare_tran_term.py to compare transcripts "
"with terminators.\n")
compare_term_tran(self.gff_outfolder,
os.path.join(args_tran.terms, "tmp"),
args_tran.fuzzy_term, args_tran.fuzzy_term,
args_tran.out_folder, "transcript",
args_tran.terms, self.gff_outfolder)
for file_ in os.listdir(os.path.join(args_tran.out_folder, "statistics")):
if file_.startswith("stat_compare_transcript_terminator_"):
log.write("\t" + file_ + " is generated.\n")
def _re_table(self, args_tran, log):
log.write("Running re_table.py to generate coverage information.\n")
log.write("The following files are updated:\n")
for gff in os.listdir(self.gff_outfolder):
if os.path.isfile(os.path.join(self.gff_outfolder, gff)):
tran_table = os.path.join(args_tran.out_folder, "tables",
gff.replace(".gff", ".csv"))
示例6: OperonDetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp_dir [as 别名]
class OperonDetection(object):
'''detection of operon'''
def __init__(self, args_op):
self.multiparser = Multiparser()
self.helper = Helper()
if args_op.tsss is not None:
self.tss_path = os.path.join(args_op.tsss, "tmp")
else:
self.tss_path = None
self.tran_path = os.path.join(args_op.trans, "tmp")
self.table_path = os.path.join(args_op.output_folder, "tables")
if args_op.terms is not None:
self._check_gff(args_op.terms, "term")
self.term_path = os.path.join(args_op.terms, "tmp")
else:
self.term_path = None
def _check_gff(self, gffs, type_):
for gff in os.listdir(gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(gffs, gff))
def _detect_operon(self, prefixs, args_op, log):
log.write("Running detect_operon.py to detect operon.\n")
log.write("The the following files are generated:\n")
for prefix in prefixs:
out_gff = os.path.join(args_op.output_folder, "gffs",
"_".join([prefix, "operon.gff"]))
out_table = os.path.join(self.table_path,
"_".join([prefix, "operon.csv"]))
print("Detecting operons of {0}".format(prefix))
if self.tss_path is None:
tss = False
else:
tss = self.helper.get_correct_file(
self.tss_path, "_TSS.gff", prefix, None, None)
tran = self.helper.get_correct_file(
self.tran_path, "_transcript.gff", prefix, None, None)
gff = self.helper.get_correct_file(
args_op.gffs, ".gff", prefix, None, None)
if self.term_path is None:
term = False
else:
term = self.helper.get_correct_file(
self.term_path, "_term.gff", prefix, None, None)
operon(tran, tss, gff, term, args_op.tss_fuzzy,
args_op.term_fuzzy, args_op.length, out_table, out_gff)
log.write("\t" + out_table + "\n")
log.write("\t" + out_gff + "\n")
def _check_and_parser_gff(self, args_op):
self._check_gff(args_op.gffs, "gff")
self._check_gff(args_op.trans, "tran")
self.multiparser.parser_gff(args_op.gffs, None)
self.multiparser.parser_gff(args_op.trans, "transcript")
self.multiparser.combine_gff(args_op.gffs, self.tran_path,
None, "transcript")
if args_op.tsss is not None:
self._check_gff(args_op.tsss, "tss")
self.multiparser.parser_gff(args_op.tsss, "TSS")
self.multiparser.combine_gff(args_op.gffs, self.tss_path, None, "TSS")
if args_op.terms is not None:
self._check_gff(args_op.terms, "term")
self.multiparser.parser_gff(args_op.terms, "term")
self.multiparser.combine_gff(args_op.gffs, self.term_path,
None, "term")
def _stat(self, table_path, stat_folder, log):
log.write("Running stat_operon.py to do statistics.\n")
for table in os.listdir(table_path):
if table.endswith("_operon.csv"):
filename = "_".join(["stat", table])
out_stat = os.path.join(stat_folder, filename)
stat(os.path.join(table_path, table), out_stat)
log.write("\t" + out_stat + "\n")
def run_operon(self, args_op, log):
self._check_and_parser_gff(args_op)
prefixs = []
for gff in os.listdir(args_op.gffs):
if gff.endswith(".gff"):
prefixs.append(gff.replace(".gff", ""))
self._detect_operon(prefixs, args_op, log)
self._stat(self.table_path, args_op.stat_folder, log)
self.helper.remove_tmp_dir(args_op.gffs)
self.helper.remove_tmp_dir(args_op.tsss)
self.helper.remove_tmp_dir(args_op.trans)
if args_op.terms is not None:
self.helper.remove_tmp_dir(args_op.terms)
示例7: GoTermFinding
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp_dir [as 别名]
#.........这里部分代码省略.........
"Protein_id", "Go_term"]) + "\n")
for row in csv.reader(fh, delimiter='\t'):
if row[0] != "Genome":
out.write("\t".join(row) + "\n")
out.close()
fh.close()
shutil.move(out_all + "_tmp", out_all)
def _merge_files(self, gffs, out_path, out_folder, log):
'''merge the files according to the input genome folder'''
folders = []
log.write("Merging the output files based on the input genome "
"information.\n")
for folder in os.listdir(gffs):
if folder.endswith("gff_folder"):
folder_prefix = folder.replace(".gff_folder", "")
folder_path = os.path.join(out_folder, folder_prefix)
self.helper.check_make_folder(folder_path)
folders.append(folder_path)
filenames = []
for gff in os.listdir(os.path.join(gffs, folder)):
if gff.endswith(".gff"):
filenames.append(gff.replace(".gff", ""))
out_all = os.path.join(folder_path, self.all_strain)
if len(filenames) > 1:
if self.all_strain in os.listdir(folder_path):
os.remove(out_all)
for filename in filenames:
csv_file = "_".join([filename, "uniprot.csv"])
self.helper.merge_file(os.path.join(out_path,
filename, csv_file), out_all)
self._remove_header(out_all)
shutil.copy(os.path.join(out_path, filename, csv_file),
folder_path)
else:
shutil.copyfile(os.path.join(out_path, filenames[0],
"_".join([filenames[0], "uniprot.csv"])),
out_all)
self.helper.remove_all_content(out_path, None, "dir")
self.helper.remove_all_content(out_path, None, "file")
for folder in folders:
folder_prefix = folder.split("/")[-1]
shutil.move(folder, os.path.join(out_path, folder_prefix))
for file_ in os.listdir(os.path.join(out_path, folder_prefix)):
log.write("\t" + os.path.join(out_path, folder_prefix, file_) +
" is generated.\n")
def _stat(self, out_path, stat_path, go, goslim, out_folder, log):
log.write("Running gene_ontology.py to Retrieve GOslim terms and "
"do statistics.\n")
log.write("The following files are generated:\n")
for folder in os.listdir(out_path):
strain_stat_path = os.path.join(stat_path, folder)
self.helper.check_make_folder(strain_stat_path)
fig_path = os.path.join(strain_stat_path, "figs")
if "fig" not in os.listdir(strain_stat_path):
os.mkdir(fig_path)
stat_file = os.path.join(strain_stat_path,
"_".join(["stat", folder + ".csv"]))
map2goslim(goslim, go,
os.path.join(out_path, folder, self.all_strain),
stat_file, out_folder)
log.write("\t" + stat_file + "\n")
self.helper.move_all_content(out_folder, fig_path,
["_three_roots.png"])
self.helper.move_all_content(out_folder, fig_path,
["_molecular_function.png"])
self.helper.move_all_content(out_folder, fig_path,
["_cellular_component.png"])
self.helper.move_all_content(out_folder, fig_path,
["_biological_process.png"])
for file_ in os.listdir(fig_path):
log.write("\t" + os.path.join(fig_path, file_) + "\n")
def run_go_term(self, args_go, log):
for gff in os.listdir(args_go.gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(
args_go.gffs, gff))
self.multiparser.parser_gff(args_go.gffs, None)
if args_go.trans is not None:
self.multiparser.parser_gff(args_go.trans, "transcript")
print("Computing all CDSs")
log.write("Retrieving GO terms for all CDSs.\n")
self._retrieve_go(args_go.uniprot, self.result_all_path, "all", log)
self._merge_files(args_go.gffs, self.result_all_path, self.out_all, log)
self._stat(self.result_all_path, self.stat_all_path, args_go.go,
args_go.goslim, self.out_all, log)
if args_go.trans is not None:
log.write("Retrieving GO terms only for expressed CDSs.\n")
print("Computing express CDSs")
self._retrieve_go(args_go.uniprot, self.result_express_path,
"express", log)
self._merge_files(args_go.gffs, self.result_express_path,
self.out_express, log)
self._stat(self.result_express_path, self.stat_express_path,
args_go.go, args_go.goslim, self.out_express, log)
self.helper.remove_tmp_dir(args_go.gffs)
if args_go.trans is not None:
self.helper.remove_tmp_dir(args_go.trans)
示例8: SubLocal
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp_dir [as 别名]
#.........这里部分代码省略.........
tmp_psortb_path, "_".join([prefix, self.endfix_raw])),
os.path.join(tmp_psortb_path, "_".join([
prefix, self.endfix_table])),
None, None, args_sub.fuzzy)
log.write("\t" + os.path.join(tmp_psortb_path, "_".join([
prefix, self.endfix_table])) + " is tempoaray generated.\n")
def _remove_header(self, out_all):
out = open(out_all + "_tmp", "w")
fh = open(out_all, "r")
out.write("\t".join(["#Genome", "Protein", "Strand", "Start",
"End", "Location", "Score"]) + "\n")
for row in csv.reader(fh, delimiter='\t'):
if row[0] != "#Genome":
out.write("\t".join(row) + "\n")
out.close()
fh.close()
shutil.move(out_all + "_tmp", out_all)
def _merge_and_stat(self, gffs, tmp_psortb_path, stat_path, psortb_result,
log):
for folder in os.listdir(gffs):
if folder.endswith(".gff_folder"):
prefix = folder.replace(".gff_folder", "")
self.helper.check_make_folder(
os.path.join(psortb_result, prefix))
merge_table = os.path.join(
psortb_result, prefix,
"_".join([prefix, self.endfix_table]))
for gff in os.listdir(os.path.join(gffs, folder)):
result = self.helper.get_correct_file(
tmp_psortb_path, "_" + self.endfix_raw,
gff.replace(".gff", ""), None, None)
shutil.copy(result, os.path.join(psortb_result, prefix))
result = self.helper.get_correct_file(
tmp_psortb_path, "_" + self.endfix_table,
gff.replace(".gff", ""), None, None)
self.helper.merge_file(result, merge_table)
log.write("\t" + merge_table + "\n")
self._remove_header(merge_table)
self.helper.check_make_folder(os.path.join(stat_path, prefix))
stat_folder = os.path.join(stat_path, prefix)
stat_file = os.path.join(stat_folder, "_".join([
"stat", prefix, "sublocal.csv"]))
stat_sublocal(merge_table,
os.path.join(stat_folder, prefix),
stat_file)
for file_ in os.listdir(stat_folder):
log.write("\t" + os.path.join(stat_folder, file_) + "\n")
def _remove_tmps(self, args_sub):
self.helper.remove_tmp_dir(args_sub.fastas)
self.helper.remove_tmp_dir(args_sub.gffs)
self.helper.remove_all_content(args_sub.out_folder, "tmp", "dir")
self.helper.remove_all_content(self.out_all, "tmp", "dir")
self.helper.remove_all_content(self.out_express, "tmp", "dir")
os.remove(os.path.join(self.out_all, "tmp_log"))
if args_sub.trans is not None:
os.remove(os.path.join(self.out_express, "tmp_log"))
self.helper.remove_tmp_dir(args_sub.trans)
def run_sub_local(self, args_sub, log):
for gff in os.listdir(args_sub.gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(
args_sub.gffs, gff))
self.multiparser.parser_gff(args_sub.gffs, None)
self.multiparser.parser_fasta(args_sub.fastas)
if args_sub.trans is not None:
self.multiparser.parser_gff(args_sub.trans, "transcript")
self.helper.check_make_folder(self.express_tmp_path)
self.helper.check_make_folder(self.express_tmp_result)
self.helper.check_make_folder(self.all_tmp_path)
self.helper.check_make_folder(self.all_tmp_result)
for gff in os.listdir(self.gff_path):
if args_sub.trans is not None:
print("Running expressed genes now")
prefix = self._get_protein_seq(gff, self.express_tmp_path,
self.tran_path, args_sub, log)
self._run_psortb(args_sub, prefix, self.out_express,
self.express_tmp_path,
self.express_tmp_result, log)
self._extract_result(args_sub, self.express_tmp_result, prefix,
os.path.join(self.gff_path, gff), log)
print("Running all genes now")
prefix = self._get_protein_seq(gff, self.all_tmp_path, None,
args_sub, log)
self._run_psortb(args_sub, prefix, self.out_all,
self.all_tmp_path, self.all_tmp_result, log)
self._extract_result(args_sub, self.all_tmp_result, prefix,
os.path.join(self.gff_path, gff), log)
log.write("Running stat_sublocal.py to do statistics, generate "
"merged tables, and plot figures.\n")
log.write("The following files are generated:\n")
self._merge_and_stat(args_sub.gffs, self.all_tmp_result,
self.all_stat_path, self.all_result, log)
if args_sub.trans is not None:
self._merge_and_stat(args_sub.gffs, self.express_tmp_result,
self.express_stat_path, self.express_result, log)
self._remove_tmps(args_sub)
示例9: Terminator
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp_dir [as 别名]
#.........这里部分代码省略.........
self._run_rnafold(args_term.RNAfold_path, tmp_seq, tmp_sec,
prefix, log)
log.write("Running extract_sec_info.py to extract the "
"information of secondary structure from {0}.\n".format(
prefix))
extract_info_sec(tmp_sec, tmp_seq, tmp_index)
os.remove(tmp_index)
log.write("Running get_polyT.py to detect the "
"terminator candidates for {0}.\n".format(prefix))
poly_t(tmp_seq, tmp_sec, gff_file, tran_file, tmp_cand, args_term)
log.write("\t" + tmp_cand + " which temporary stores terminator "
"candidates is generated.\n")
print("Detecting terminators for " + prefix)
log.write("Running detect_coverage_term.py to gain "
"high-confidence terminators for {0}.\n".format(prefix))
detect_coverage(
tmp_cand, os.path.join(merge_path, prefix + ".gff"),
os.path.join(self.tran_path, "_".join([
prefix, "transcript.gff"])),
os.path.join(self.fasta_path, prefix + ".fa"),
os.path.join(wig_path, "_".join([prefix, "forward.wig"])),
os.path.join(wig_path, "_".join([prefix, "reverse.wig"])),
os.path.join(self.tmps["hp_path"], "_".join([
prefix, self.tmps["hp_gff"]])), merge_wigs,
os.path.join(self.outfolder["term"], "_".join([
prefix, self.suffixs["gff"]])),
os.path.join(self.tmps["term_table"], "_".join([
prefix, "term_raw.csv"])), args_term)
self.multiparser.combine_gff(args_term.gffs, self.outfolder["term"],
None, "term")
self._move_file(self.outfolder["term"], self.outfolder["csv"])
def _remove_tmp_file(self, merge_wigs, args_term):
self.helper.remove_tmp_dir(args_term.gffs)
self.helper.remove_tmp_dir(args_term.fastas)
if args_term.srnas is not None:
self.helper.remove_tmp(args_term.srnas)
shutil.rmtree(self.tmps["merge"])
if (args_term.tex_wigs is not None) and (
args_term.frag_wigs is not None):
shutil.rmtree(merge_wigs)
self.helper.remove_tmp_dir(args_term.trans)
if "tmp_wig" in os.listdir(args_term.out_folder):
shutil.rmtree(os.path.join(args_term.out_folder, "tmp_wig"))
self.helper.remove_tmp(self.outfolder["term"])
shutil.rmtree(self.tmps["transterm"])
shutil.rmtree(self.tmps["term_table"])
self.helper.remove_all_content(args_term.out_folder,
"inter_seq_", "file")
self.helper.remove_all_content(self.outfolder["term"],
"_term.gff", "file")
self.helper.remove_all_content(args_term.out_folder,
"inter_sec_", "file")
self.helper.remove_all_content(args_term.out_folder,
"term_candidates_", "file")
def _compute_stat(self, args_term, log):
new_prefixs = []
for gff in os.listdir(self.terms["all"]):
if gff.endswith("_term_all.gff"):
out_tmp = open(self.tmps["gff"], "w")
out_tmp.write("##gff-version 3\n")
new_prefix = gff.replace("_term_all.gff", "")
new_prefixs.append(gff.replace("_term_all.gff", ""))
num = 0
fh = open(os.path.join(self.terms["all"], gff))
示例10: RATT
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp_dir [as 别名]
class RATT(object):
'''annotation transfer'''
def __init__(self, args_ratt):
self.multiparser = Multiparser()
self.converter = Converter()
self.format_fixer = FormatFixer()
self.helper = Helper()
if args_ratt.ref_gbk:
self.gbk = os.path.join(args_ratt.ref_gbk, "gbk_tmp")
self.gbk_tmp = os.path.join(self.gbk, "tmp")
self.embl = os.path.join(args_ratt.ref_gbk, "embls")
if args_ratt.ref_embls:
self.embl = args_ratt.ref_embls
self.ratt_log = os.path.join(args_ratt.output_path, "ratt_log.txt")
self.tmp_files = {"tar": os.path.join(args_ratt.tar_fastas, "tmp"),
"ref": os.path.join(args_ratt.ref_fastas, "tmp"),
"out_gff": os.path.join(args_ratt.gff_outfolder,
"tmp"),
"gff": os.path.join(args_ratt.gff_outfolder,
"tmp.gff"),
"ptt": os.path.join(args_ratt.gff_outfolder,
"tmp.ptt"),
"rnt": os.path.join(args_ratt.gff_outfolder,
"tmp.rnt")}
def _convert_to_pttrnt(self, gffs, files, log):
for gff in files:
if gff.endswith(".gff"):
gff = os.path.join(gffs, gff)
filename = gff.split("/")
prefix = filename[-1][:-4]
rnt = gff[:-3] + "rnt"
ptt = gff[:-3] + "ptt"
fasta = self.helper.get_correct_file(self.tmp_files["tar"],
".fa", prefix, None, None)
if fasta:
self.converter.convert_gff2rntptt(gff, fasta, ptt, rnt,
None, None)
log.write("\t" + ptt + " is generated.\n")
log.write("\t" + rnt + " is generated.\n")
def _remove_files(self, args_ratt, out_gbk, log):
self.helper.remove_all_content(args_ratt.gff_outfolder, ".gff", "file")
self.helper.remove_all_content(args_ratt.gff_outfolder, ".ptt", "file")
self.helper.remove_all_content(args_ratt.gff_outfolder, ".rnt", "file")
log.write("Moving the final output files to {0}.\n".format(args_ratt.gff_outfolder))
self.helper.move_all_content(self.tmp_files["out_gff"],
args_ratt.gff_outfolder, None)
log.write("Remove the temperary files.\n")
shutil.rmtree(self.tmp_files["out_gff"])
shutil.rmtree(self.tmp_files["tar"])
shutil.rmtree(self.tmp_files["ref"])
self.helper.remove_tmp_dir(args_ratt.tar_fastas)
self.helper.remove_tmp_dir(args_ratt.ref_fastas)
self.helper.remove_tmp_dir(args_ratt.ref_embls)
self.helper.remove_tmp_dir(args_ratt.ref_gbk)
def _convert_to_gff(self, ratt_result, args_ratt, files, log):
name = ratt_result.split(".")
filename = ".".join(name[1:-2]) + ".gff"
output_file = os.path.join(args_ratt.output_path, filename)
self.converter.convert_embl2gff(
os.path.join(args_ratt.output_path, ratt_result), output_file)
self.format_fixer.fix_ratt(output_file, ".".join(name[1:-2]),
"tmp_gff")
shutil.move("tmp_gff", output_file)
shutil.copy(output_file, os.path.join(args_ratt.gff_outfolder,
filename))
log.write("\t" + os.path.join(args_ratt.gff_outfolder, filename) +
" is generated.\n")
files.append(filename)
def _parser_embl_gbk(self, files):
self.helper.check_make_folder(self.gbk)
for file_ in files:
close = False
with open(file_, "r") as f_h:
for line in f_h:
if (line.startswith("LOCUS")):
out = open(self.gbk_tmp, "w")
datas = line.split(" ")
for data in datas:
if (len(data) != 0) and (data != "LOCUS"):
filename = ".".join([data.strip(), "gbk"])
break
elif (line.startswith("VERSION")):
datas = line.split(" ")
for data in datas:
if (len(data) != 0) and (data != "VERSION"):
new_filename = ".".join([data.strip(), "gbk"])
break
if new_filename.find(filename):
filename = new_filename
if out:
out.write(line)
if line.startswith("//"):
out.close()
close = True
shutil.move(self.gbk_tmp,
#.........这里部分代码省略.........
示例11: TSSpredator
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp_dir [as 别名]
#.........这里部分代码省略.........
self.tmps["tmp"]))
for wig_file in os.listdir(wig_folder):
for lib in libs:
info = lib.split(":")
if (info[0][:-4] in wig_file) and (info[-1] == "+") and (
prefix in wig_file) and (
os.path.isfile(os.path.join(wig_folder, wig_file))):
Helper().merge_file(
os.path.join(wig_folder, wig_file),
os.path.join("tmp", "merge_forward.wig"))
if (info[0][:-4] in wig_file) and (info[-1] == "-") and (
prefix in wig_file) and (
os.path.isfile(os.path.join(wig_folder, wig_file))):
Helper().merge_file(
os.path.join(wig_folder, wig_file),
os.path.join("tmp", "merge_reverse.wig"))
def _check_orphan(self, prefixs, wig_folder, args_tss):
'''if genome has no locus tag, it can use for classify the TSS'''
for prefix in prefixs:
self._merge_wigs(wig_folder, prefix, args_tss.libs)
tmp_tss = os.path.join(self.tmps["tmp"], "_".join([
prefix, args_tss.program + ".gff"]))
pre_tss = os.path.join(self.gff_outfolder, "_".join([
prefix, args_tss.program + ".gff"]))
check_orphan(pre_tss, os.path.join(
args_tss.gffs, prefix + ".gff"),
"tmp/merge_forward.wig", "tmp/merge_reverse.wig", tmp_tss)
shutil.move(tmp_tss, pre_tss)
shutil.rmtree("tmp")
def _remove_files(self, args_tss):
print("Remove temperary files and folders")
self.helper.remove_tmp_dir(args_tss.fastas)
self.helper.remove_tmp_dir(args_tss.gffs)
self.helper.remove_tmp_dir(args_tss.ta_files)
if "merge_forward.wig" in os.listdir(os.getcwd()):
os.remove("merge_forward.wig")
if "merge_reverse.wig" in os.listdir(os.getcwd()):
os.remove("merge_reverse.wig")
shutil.rmtree(args_tss.wig_folder)
if args_tss.manual is not None:
shutil.rmtree(args_tss.manual)
def _deal_with_overlap(self, out_folder, args_tss):
'''deal with the situation that TSS and
processing site at the same position'''
if not args_tss.overlap_feature:
pass
else:
print("Comparing TSSs and Processing sites")
if args_tss.program.lower() == "tss":
for tss in os.listdir(out_folder):
if tss.endswith("_TSS.gff"):
ref = self.helper.get_correct_file(
args_tss.overlap_gffs, "_processing.gff",
tss.replace("_TSS.gff", ""), None, None)
filter_tss_pro(os.path.join(out_folder, tss),
ref, args_tss.program,
args_tss.cluster)
elif args_tss.program.lower() == "processing":
for tss in os.listdir(out_folder):
if tss.endswith("_processing.gff"):
ref = self.helper.get_correct_file(
args_tss.overlap_gffs, "_TSS.gff",
tss.replace("_processing.gff", ""), None, None)
示例12: UTRDetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp_dir [as 别名]
class UTRDetection(object):
'''detection of UTR'''
def __init__(self, args_utr):
self.helper = Helper()
self.multiparser = Multiparser()
self.tss_path = os.path.join(args_utr.tsss, "tmp")
self.tran_path = os.path.join(args_utr.trans, "tmp")
self.utr5_path = os.path.join(args_utr.out_folder, "5UTRs")
self.utr3_path = os.path.join(args_utr.out_folder, "3UTRs")
self.utr5_stat_path = os.path.join(self.utr5_path, "statistics")
self.utr3_stat_path = os.path.join(self.utr3_path, "statistics")
def _check_folder(self, folder):
if folder is None:
print("Error: Lack required files!")
sys.exit()
def _check_gff(self, folder):
for gff in os.listdir(folder):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(folder, gff))
def _compute_utr(self, args_utr, log):
log.write("Running detect_utr.py to detect UTRs.\n")
for gff in os.listdir(args_utr.gffs):
if gff.endswith(".gff"):
prefix = gff[:-4]
tss = self.helper.get_correct_file(
self.tss_path, "_TSS.gff", prefix, None, None)
tran = self.helper.get_correct_file(
self.tran_path, "_transcript.gff", prefix, None, None)
if args_utr.terms:
term = self.helper.get_correct_file(
os.path.join(args_utr.terms, "tmp"),
"_term.gff", prefix, None, None)
else:
term = None
print("Computing 5'UTRs of {0}".format(prefix))
detect_5utr(tss, os.path.join(args_utr.gffs, gff),
tran, os.path.join(self.utr5_path, "gffs",
"_".join([prefix, "5UTR.gff"])), args_utr)
print("Computing 3'UTRs of {0}".format(prefix))
detect_3utr(tran, os.path.join(args_utr.gffs, gff),
term, os.path.join(self.utr3_path, "gffs",
"_".join([prefix, "3UTR.gff"])), args_utr)
self.helper.move_all_content(
os.getcwd(), self.utr5_stat_path, ["_5utr_length.png"])
self.helper.move_all_content(
os.getcwd(), self.utr3_stat_path, ["_3utr_length.png"])
log.write("The following files are generated:\n")
for folder in (os.path.join(self.utr5_path, "gffs"),
os.path.join(self.utr3_path, "gffs"),
self.utr5_stat_path, self.utr3_stat_path):
for file_ in os.listdir(folder):
log.write("\t" + os.path.join(folder, file_) + "\n")
def run_utr_detection(self, args_utr, log):
self._check_folder(args_utr.tsss)
self._check_folder(args_utr.gffs)
self._check_folder(args_utr.trans)
self._check_gff(args_utr.tsss)
self._check_gff(args_utr.gffs)
self._check_gff(args_utr.trans)
self._check_gff(args_utr.terms)
self.multiparser.parser_gff(args_utr.gffs, None)
self.multiparser.parser_gff(args_utr.tsss, "TSS")
self.multiparser.combine_gff(args_utr.gffs, self.tss_path, None, "TSS")
self.multiparser.parser_gff(args_utr.trans, "transcript")
self.multiparser.combine_gff(args_utr.gffs, self.tran_path,
None, "transcript")
if args_utr.terms:
self.multiparser.parser_gff(args_utr.terms, "term")
self.multiparser.combine_gff(args_utr.gffs,
os.path.join(args_utr.terms, "tmp"),
None, "term")
self._compute_utr(args_utr, log)
self.helper.remove_tmp_dir(args_utr.gffs)
self.helper.remove_tmp_dir(args_utr.tsss)
self.helper.remove_tmp_dir(args_utr.trans)
self.helper.remove_tmp_dir(args_utr.terms)
self.helper.remove_tmp(self.utr5_path)
self.helper.remove_tmp(self.utr3_path)
示例13: SNPCalling
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp_dir [as 别名]
#.........这里部分代码省略.........
for fasta in os.listdir(fastas):
if fasta.endswith("fasta") or \
fasta.endswith("fa") or \
fasta.endswith("fna"):
self.seq_editer.modify_header(os.path.join(fastas, fasta))
def _get_header(self, samtools_path, bam, seq_names):
command = " ".join([samtools_path, "view", "-H", bam])
os.system(">".join([command, self.header]))
fh = open(self.header, "r")
for row in csv.reader(fh, delimiter="\t"):
if row[0] == "@SQ":
if row[1].split(":")[1] not in seq_names:
seq_names.append(row[1].split(":")[1])
fh.close()
def _get_genome_name(self, args_snp, bam_datas):
seq_names = []
for bam in bam_datas:
bam_file = os.path.join(args_snp.out_folder,
bam["sample"] + ".bam")
self._get_header(args_snp.samtools_path,
bam_file, seq_names)
return seq_names
def _remove_bams(self, bam_datas, args_snp):
for bam in bam_datas:
bam_file = os.path.join(args_snp.out_folder,
bam["sample"] + ".bam")
if os.path.exists(bam_file):
os.remove(bam_file)
if os.path.exists(bam_file + ".bai"):
os.remove(bam_file + ".bai")
if os.path.exists(self.header):
os.remove(self.header)
os.remove(self.outputs["depth"] + bam["sample"])
def _extract_bams(self, bams, log):
bam_datas = []
for bam in bams:
datas = bam.split(":")
if len(datas) != 2:
log.write("the format of --bam_files is wrong!\n")
print("Error: the format of --bam_files is wrong!")
sys.exit()
for file_ in datas[-1].split(","):
if not os.path.exists(file_):
print("Error: there are some Bam files "
"which do not exist!")
log.write(file_ + " is not found.\n")
sys.exit()
bam_datas.append({"sample": datas[0],
"rep": len(datas[-1].split(",")),
"bams": datas[-1].split(",")})
return bam_datas
def _merge_fasta(self, fastas, log):
all_fasta = os.path.join(fastas, "all.fa")
names = []
out = open(all_fasta, "w")
print_ = False
for fasta in os.listdir(fastas):
if (fasta.endswith(".fa")) or (
fasta.endswith(".fasta")) or (
fasta.endswith(".fna")):
with open(os.path.join(fastas, fasta)) as fh:
for line in fh:
line = line.strip()
if line.startswith(">"):
if line not in names:
print_ = True
names.append(line)
else:
print_ = False
if print_:
out.write(line + "\n")
log.write(os.path.join(fastas, fasta) + " is loaded.\n")
out.close()
return all_fasta
def run_snp_calling(self, args_snp, log):
self._modify_header(args_snp.fastas)
all_fasta = self._merge_fasta(args_snp.fastas, log)
bam_datas = self._extract_bams(args_snp.bams, log)
self._merge_bams(args_snp, bam_datas, log)
if ("with_BAQ" not in args_snp.program) and (
"without_BAQ" not in args_snp.program) and (
"extend_BAQ" not in args_snp.program):
print("Error: Please assign a correct programs: "
"\"with_BAQ\", \"without_BAQ\", \"extend_BAQ\".")
sys.exit()
else:
print("Detecting mutations now")
self._run_program(all_fasta, bam_datas, args_snp, log)
os.remove(self.outputs["tmp"])
os.remove(all_fasta)
os.remove(all_fasta + ".fai")
self.helper.remove_tmp_dir(args_snp.fastas)
self._remove_bams(bam_datas, args_snp)
log.write("Remove all the temporary files.\n")
示例14: CircRNADetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import remove_tmp_dir [as 别名]
#.........这里部分代码省略.........
if not os.path.exists(file_):
print("Error: some files in --bam_files or "
"--read_files do not exist!")
sys.exit()
input_datas.append({"sample": datas[0],
"files": datas[-1].split(",")})
return input_datas
def _combine_read_bam(self, bam_files, bam_datas, read_datas):
if bam_datas is not None:
for bam_data in bam_datas:
for read_data in read_datas:
if bam_data["sample"] == read_data["sample"]:
for read in read_data["files"]:
prefix = ".".join(
read.split("/")[-1].split(".")[:-1])
bam = os.path.join(self.alignment_path,
prefix + ".bam")
if (bam in bam_files) and (
bam not in bam_data["files"]):
bam_data["files"].append(bam)
else:
bam_datas = []
for read_data in read_datas:
bam_files = []
for read in read_data["files"]:
prefix = ".".join(
read.split("/")[-1].split(".")[:-1])
bam_files.append(os.path.join(
self.alignment_path, prefix + ".bam"))
bam_datas.append({"sample": read_data["sample"],
"files": bam_files})
return bam_datas
def _remove_tmp_files(self, args_circ, fa_prefixs):
self.helper.remove_tmp_dir(args_circ.fastas)
self.helper.remove_tmp_dir(args_circ.gffs)
self.helper.remove_all_content(args_circ.output_folder,
".bam", "file")
for prefix in fa_prefixs:
shutil.rmtree(os.path.join(args_circ.output_folder, prefix))
def run_circrna(self, args_circ, log):
'''detection of circRNA'''
bam_datas = None
read_datas = None
if (args_circ.bams is None) and (args_circ.read_files is None):
log.write("--bam_files and --read_files can not be both emtpy.\n")
print("Error: --bam_files or --read_files should be assigned.")
sys.exit()
if args_circ.bams is not None:
bam_datas = self._extract_input_files(args_circ.bams)
if args_circ.read_files is not None:
read_datas = self._extract_input_files(args_circ.read_files)
for gff in os.listdir(args_circ.gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(
args_circ.gffs, gff))
if args_circ.segemehl_path is None:
log.write("segemehl does not exists.\n")
print("Error: please assign segemehl path!!")
sys.exit()
self.multiparser.parser_fasta(args_circ.fastas)
self.multiparser.parser_gff(args_circ.gffs, None)
self.multiparser.combine_gff(args_circ.fastas, self.gff_path,
"fasta", None)
tmp_reads = []
if args_circ.read_files:
log.write("Raw read files are found.\n")
tmp_reads = self._deal_zip_file(read_datas, log)
align_files, prefixs = self._align(args_circ, tmp_reads, log)
else:
align_files = None
prefixs = []
for fasta in os.listdir(self.fasta_path):
if fasta.endswith(".fa"):
fasta_prefix = fasta.replace(".fa", "")
prefixs.append(fasta_prefix)
for prefix in prefixs:
if args_circ.read_files:
sub_alignment_path = os.path.join(self.alignment_path, prefix)
bam_files, convert_ones, remove_ones = self._convert_sam2bam(
sub_alignment_path, args_circ.samtools_path, align_files, log)
else:
convert_ones = []
remove_ones = []
self._merge_sort_aligment_file(
bam_datas, read_datas, args_circ.samtools_path,
args_circ.output_folder,
convert_ones, tmp_reads, remove_ones, prefix, log)
self._run_testrealign(prefix, args_circ.testrealign_path,
args_circ.output_folder, log)
samples, fa_prefixs = self._merge_bed(
args_circ.fastas, self.splice_path, args_circ.output_folder)
self._stat_and_gen_gff(fa_prefixs, samples, args_circ, log)
if len(tmp_reads) != 0:
for reads in tmp_reads:
for read in reads["zips"]:
os.remove(read)
self._remove_tmp_files(args_circ, fa_prefixs)