本文整理汇总了Python中annogesiclib.helper.Helper.merge_file方法的典型用法代码示例。如果您正苦于以下问题:Python Helper.merge_file方法的具体用法?Python Helper.merge_file怎么用?Python Helper.merge_file使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类annogesiclib.helper.Helper
的用法示例。
在下文中一共展示了Helper.merge_file方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: TranscriptDetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import merge_file [as 别名]
#.........这里部分代码省略.........
final_gff = os.path.join(
self.gff_outfolder,
"_".join([strain, self.endfix_tran]))
shutil.move(frag_gff, final_gff)
log.write("\t" + final_gff + " is generated.\n")
elif args_tran.tex_wigs is not None:
for strain in strains:
tex_gff = os.path.join(
self.gff_outfolder, "_".join([strain, self.tex]))
final_gff = os.path.join(
self.gff_outfolder,
"_".join([strain, self.endfix_tran]))
shutil.move(tex_gff, final_gff)
log.write("\t" + final_gff + " is generated.\n")
def _post_modify(self, tas, args_tran):
'''modify the transcript by comparing with genome annotation'''
for ta in tas:
for gff in os.listdir(args_tran.gffs):
if (".gff" in gff) and (gff[:-4] == ta):
break
print("Modifying {0} by refering to {1}".format(ta, gff))
fill_gap(os.path.join(args_tran.gffs, gff),
os.path.join(self.tran_path,
"_".join([ta, self.endfix_tran])),
"overlap", self.tmps["overlap"], args_tran.modify)
fill_gap(os.path.join(args_tran.gffs, gff),
os.path.join(self.tran_path,
"_".join([ta, self.endfix_tran])),
"uni", self.tmps["uni"], args_tran.modify)
tmp_merge = os.path.join(self.gff_outfolder, self.tmps["merge"])
if self.tmps["merge"] in self.gff_outfolder:
os.remove(tmp_merge)
self.helper.merge_file(self.tmps["overlap"], tmp_merge)
self.helper.merge_file(self.tmps["uni"], tmp_merge)
tmp_out = os.path.join(self.gff_outfolder, "_".join(["tmp", ta]))
self.helper.sort_gff(tmp_merge, tmp_out)
os.remove(self.tmps["overlap"])
os.remove(self.tmps["uni"])
os.remove(tmp_merge)
final_out = os.path.join(self.gff_outfolder,
"_".join(["final", ta]))
longer_ta(tmp_out, args_tran.length, final_out)
shutil.move(final_out,
os.path.join(self.tmps["tran"],
"_".join([ta, self.endfix_tran])))
os.remove(tmp_out)
shutil.rmtree(self.gff_outfolder)
shutil.move(self.tmps["tran"], self.gff_outfolder)
def _remove_file(self, args_tran):
if "tmp_wig" in os.listdir(args_tran.out_folder):
shutil.rmtree(os.path.join(args_tran.out_folder, "tmp_wig"))
if "merge_wigs" in os.listdir(args_tran.out_folder):
shutil.rmtree(os.path.join(args_tran.out_folder, "merge_wigs"))
self.helper.remove_tmp_dir(args_tran.gffs)
self.helper.remove_tmp_dir(args_tran.compare_tss)
self.helper.remove_tmp_dir(args_tran.terms)
self.helper.remove_tmp(os.path.join(args_tran.out_folder, "gffs"))
self.helper.remove_tmp(self.gff_outfolder)
def _compare_term_tran(self, args_tran, log):
'''searching the associated terminator to transcript'''
if args_tran.terms is not None:
print("Comparing between terminators and transcripts")
self.multiparser.parser_gff(args_tran.terms, "term")
示例2: sRNATargetPrediction
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import merge_file [as 别名]
#.........这里部分代码省略.........
log.write("The following temporary files for storing results of {0} are "
"generated:\n".format(prefix))
for file_ in os.listdir(os.path.join(self.rnaplex_path, prefix)):
log.write("\t" + os.path.join(self.rnaplex_path, prefix, file_) + "\n")
return num_process
def _rna_plex(self, prefixs, args_tar, log):
log.write("Using RNAplex and RNAplfold to predict sRNA targets.\n")
log.write("Please make sure the version of Vienna RNA package is "
"at least 2.3.2.\n")
for prefix in prefixs:
print("Running RNAplfold of {0}".format(prefix))
self.helper.check_make_folder(
os.path.join(self.rnaplex_path, prefix))
rnaplfold_folder = os.path.join(self.rnaplex_path, prefix,
"RNAplfold")
os.mkdir(rnaplfold_folder)
self._run_rnaplfold(
args_tar.rnaplfold_path, "sRNA", args_tar.win_size_s,
args_tar.span_s, args_tar.unstr_region_rnaplex_s,
self.srna_seq_path, prefix, rnaplfold_folder, log)
self._run_rnaplfold(
args_tar.rnaplfold_path, "target", args_tar.win_size_t,
args_tar.span_t, args_tar.unstr_region_rnaplex_t,
self.target_seq_path, prefix, rnaplfold_folder, log)
num_process = self._run_rnaplex(prefix, rnaplfold_folder, args_tar, log)
rnaplex_file = os.path.join(self.rnaplex_path, prefix,
"_".join([prefix, "RNAplex.txt"]))
if ("_".join([prefix, "RNAplex.txt"]) in
os.listdir(os.path.join(self.rnaplex_path, prefix))):
os.remove(rnaplex_file)
for index in range(0, num_process):
log.write("Using helper.py to merge the temporary files.\n")
self.helper.merge_file(os.path.join(
self.rnaplex_path, prefix, "_".join([
prefix, "RNAplex", str(index) + ".txt"])),
rnaplex_file)
log.write("\t" + rnaplex_file + " is generated.\n")
self.helper.remove_all_content(os.path.join(
self.rnaplex_path, prefix), "_RNAplex_", "file")
self.fixer.fix_rnaplex(rnaplex_file, self.tmps["tmp"])
shutil.move(self.tmps["tmp"], rnaplex_file)
shutil.rmtree(rnaplfold_folder)
def _run_rnaup(self, num_up, processes, prefix, out_rnaup, out_log,
args_tar, log):
for index in range(1, num_up + 1):
out_tmp_up = open(os.path.join(
args_tar.out_folder, "".join([self.tmps["rnaup"],
str(index), ".txt"])), "w")
out_err = open(os.path.join(
args_tar.out_folder, "".join([self.tmps["log"],
str(index), ".txt"])), "w")
in_up = open(os.path.join(
args_tar.out_folder, "".join([self.tmps["tmp"],
str(index), ".fa"])), "r")
log.write(" ".join([args_tar.rnaup_path,
"-u", str(args_tar.unstr_region_rnaup),
"-o", "--interaction_first"]) + "\n")
p = Popen([args_tar.rnaup_path,
"-u", str(args_tar.unstr_region_rnaup),
"-o", "--interaction_first"],
stdin=in_up, stdout=out_tmp_up, stderr=out_err)
processes.append(p)
if len(processes) != 0:
time.sleep(5)
示例3: Ribos
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import merge_file [as 别名]
class Ribos(object):
def __init__(self, args_ribo):
self.multiparser = Multiparser()
self.helper = Helper()
self.gff_parser = Gff3Parser()
self.gff_path = os.path.join(args_ribo.gffs, "tmp")
self.tss_path = os.path.join(args_ribo.tsss, "tmp")
self.tran_path = os.path.join(args_ribo.trans, "tmp")
self.fasta_path = os.path.join(args_ribo.fastas, "tmp")
self.stat_folder = os.path.join(args_ribo.out_folder, "statistics")
self.gff_outfolder = os.path.join(args_ribo.out_folder, "gffs")
self.table_folder = os.path.join(args_ribo.out_folder, "tables")
self.scan_folder = os.path.join(args_ribo.out_folder, "scan_Rfam")
self.ribos_rfam = os.path.join(args_ribo.database,
"Rfam_riboswitch.cm")
self.tmp_files = {"fasta": os.path.join(
args_ribo.out_folder, "tmp_fasta"),
"scan": os.path.join(
args_ribo.out_folder, "tmp_scan"),
"table": os.path.join(
args_ribo.out_folder, "tmp_table")}
self.suffixs = {"csv": "riboswitch.csv",
"txt": "riboswitch_prescan.txt",
"re_txt": "riboswitch_scan.txt",
"re_csv": "riboswitch_scan.csv"}
def _run_infernal(self, args_ribo, seq, type_, prefix):
scan_file = os.path.join(self.tmp_files["scan"],
"_".join([prefix, self.suffixs[type_]]))
scan = open(scan_file, "w")
call([os.path.join(args_ribo.infernal_path, "cmscan"), "--incE",
str(args_ribo.e_value), "--acc", self.ribos_rfam, seq],
stdout=scan)
scan.close()
return scan_file
def _scan_extract_rfam(self, prefixs, args_ribo):
for gff in os.listdir(self.gff_path):
if gff.endswith(".gff"):
prefix = gff.replace(".gff", "")
first_seq = os.path.join(self.tmp_files["fasta"],
prefix + ".fa")
prefixs.append(prefix)
print("extracting seq of riboswitch candidates of {0}".format(
prefix))
extract_potential_rbs(
os.path.join(self.fasta_path, prefix + ".fa"),
os.path.join(self.gff_path, gff),
os.path.join(self.tss_path, prefix + "_TSS.gff"),
os.path.join(self.tran_path, prefix + "_transcript.gff"),
first_seq, args_ribo)
print("pre-scanning of {0}".format(prefix))
first_scan_file = self._run_infernal(args_ribo, first_seq,
"txt", prefix)
sec_seq = os.path.join(self.tmp_files["fasta"],
"_".join([prefix, "regenerate.fa"]))
first_table = os.path.join(
self.tmp_files["table"],
"_".join([prefix, self.suffixs["csv"]]))
regenerate_seq(first_scan_file, first_seq,
first_table, sec_seq)
print("scanning of {0}".format(prefix))
sec_scan_file = self._run_infernal(args_ribo, sec_seq,
"re_txt", prefix)
sec_table = os.path.join(
self.tmp_files["table"],
"_".join([prefix, self.suffixs["re_csv"]]))
reextract_rbs(sec_scan_file, first_table, sec_table)
shutil.move(sec_table, first_table)
modify_table(first_table, args_ribo.output_all)
return prefixs
def _merge_results(self, args_ribo):
for gff in os.listdir(args_ribo.gffs):
if gff.endswith(".gff"):
prefix = gff.replace(".gff", "")
print("Merge results of {0}".format(prefix))
pre_strain = ""
self.helper.check_make_folder(os.path.join(
self.scan_folder, prefix))
fh = open(os.path.join(args_ribo.gffs, gff))
for entry in self.gff_parser.entries(fh):
if entry.seq_id != pre_strain:
if len(pre_strain) == 0:
shutil.copyfile(os.path.join(
self.tmp_files["table"],
"_".join([entry.seq_id, self.suffixs["csv"]])),
os.path.join(
self.table_folder,
"_".join([prefix, self.suffixs["csv"]])))
else:
self.helper.merge_file(os.path.join(
self.tmp_files["table"],
"_".join([entry.seq_id, self.suffixs["csv"]])),
os.path.join(
self.table_folder,
"_".join([prefix, self.suffixs["csv"]])))
shutil.copy(os.path.join(
self.tmp_files["scan"],
#.........这里部分代码省略.........
示例4: TranscriptAssembly
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import merge_file [as 别名]
#.........这里部分代码省略.........
for strain in strains:
frag_gff = os.path.join(
self.gff_outfolder, "_".join([strain, self.frag]))
final_gff = os.path.join(
self.gff_outfolder,
"_".join([strain, self.endfix_tran]))
shutil.move(frag_gff, final_gff)
elif args_tran.tex_wigs is not None:
for strain in strains:
tex_gff = os.path.join(
self.gff_outfolder, "_".join([strain, self.tex]))
final_gff = os.path.join(
self.gff_outfolder,
"_".join([strain, self.endfix_tran]))
shutil.move(tex_gff, final_gff)
def _post_modify(self, tas, args_tran):
for ta in tas:
for gff in os.listdir(args_tran.gffs):
if (".gff" in gff) and (gff[:-4] == ta):
break
print("Modifying {0} refering to {1}...".format(ta, gff))
fill_gap(os.path.join(args_tran.gffs, gff),
os.path.join(self.tran_path,
"_".join([ta, self.endfix_tran])),
"overlap", self.tmps["overlap"])
fill_gap(os.path.join(args_tran.gffs, gff),
os.path.join(self.tran_path,
"_".join([ta, self.endfix_tran])),
"uni", self.tmps["uni"])
tmp_merge = os.path.join(self.gff_outfolder, self.tmps["merge"])
if self.tmps["merge"] in self.gff_outfolder:
os.remove(tmp_merge)
self.helper.merge_file(self.tmps["overlap"], tmp_merge)
self.helper.merge_file(self.tmps["uni"], tmp_merge)
tmp_out = os.path.join(self.gff_outfolder, "_".join(["tmp", ta]))
self.helper.sort_gff(tmp_merge, tmp_out)
os.remove(self.tmps["overlap"])
os.remove(self.tmps["uni"])
os.remove(tmp_merge)
final_out = os.path.join(self.gff_outfolder,
"_".join(["final", ta]))
longer_ta(tmp_out, args_tran.length, final_out)
shutil.move(final_out,
os.path.join(self.tmps["tran"],
"_".join([ta, self.endfix_tran])))
os.remove(tmp_out)
shutil.rmtree(self.gff_outfolder)
shutil.move(self.tmps["tran"], self.gff_outfolder)
def _remove_file(self, args_tran):
if args_tran.frag_wigs is not None:
self.helper.remove_wigs(args_tran.frag_wigs)
if args_tran.tex_wigs is not None:
self.helper.remove_wigs(args_tran.tex_wigs)
if args_tran.gffs is not None:
self.helper.remove_tmp(args_tran.gffs)
if args_tran.compare_cds is not None:
self.helper.remove_tmp(args_tran.compare_cds)
if args_tran.compare_tss is not None:
self.helper.remove_tmp(args_tran.compare_tss)
if args_tran.terms is not None:
self.helper.remove_tmp(args_tran.terms)
self.helper.remove_tmp(os.path.join(args_tran.out_folder, "gffs"))
self.helper.remove_tmp(self.gff_outfolder)
示例5: GoTermFinding
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import merge_file [as 别名]
class GoTermFinding(object):
'''Retrieving the GO term'''
def __init__(self, args_go):
self.multiparser = Multiparser()
self.helper = Helper()
self.out_all = os.path.join(args_go.out_folder, "all_CDSs")
self.out_express = os.path.join(args_go.out_folder, "expressed_CDSs")
self.result_all_path = os.path.join(self.out_all, "GO_term_results")
self.result_express_path = os.path.join(self.out_express,
"GO_term_results")
self.gff_path = os.path.join(args_go.gffs, "tmp")
if args_go.trans is not None:
self.tran_path = os.path.join(args_go.trans, "tmp")
else:
self.tran_path = None
self.stat_all_path = os.path.join(self.out_all, "statistics")
self.stat_express_path = os.path.join(self.out_express,
"statistics")
self.all_strain = "all_genomes_uniprot.csv"
def _retrieve_go(self, uniprot, out_path, type_, log):
prefixs = []
log.write("Running gene_ontology.py to retrieve GO terms.\n")
for gff in os.listdir(self.gff_path):
prefix = gff.replace(".gff", "")
prefixs.append(prefix)
self.helper.check_make_folder(os.path.join(out_path, prefix))
out_file = os.path.join(out_path, prefix,
"_".join([prefix, "uniprot.csv"]))
print("Extracting GO terms of {0} from UniProt".format(prefix))
if self.tran_path is not None:
tran_file = os.path.join(self.tran_path,
"_".join([prefix, "transcript.gff"]))
else:
tran_file = None
retrieve_uniprot(uniprot, os.path.join(self.gff_path, gff),
out_file, tran_file, type_)
log.write("\t" + out_file + " is generated.\n")
def _remove_header(self, out_all):
out = open(out_all + "_tmp", "w")
fh = open(out_all, "r")
out.write("\t".join(["Genome", "Strand", "Start", "End",
"Protein_id", "Go_term"]) + "\n")
for row in csv.reader(fh, delimiter='\t'):
if row[0] != "Genome":
out.write("\t".join(row) + "\n")
out.close()
fh.close()
shutil.move(out_all + "_tmp", out_all)
def _merge_files(self, gffs, out_path, out_folder, log):
'''merge the files according to the input genome folder'''
folders = []
log.write("Merging the output files based on the input genome "
"information.\n")
for folder in os.listdir(gffs):
if folder.endswith("gff_folder"):
folder_prefix = folder.replace(".gff_folder", "")
folder_path = os.path.join(out_folder, folder_prefix)
self.helper.check_make_folder(folder_path)
folders.append(folder_path)
filenames = []
for gff in os.listdir(os.path.join(gffs, folder)):
if gff.endswith(".gff"):
filenames.append(gff.replace(".gff", ""))
out_all = os.path.join(folder_path, self.all_strain)
if len(filenames) > 1:
if self.all_strain in os.listdir(folder_path):
os.remove(out_all)
for filename in filenames:
csv_file = "_".join([filename, "uniprot.csv"])
self.helper.merge_file(os.path.join(out_path,
filename, csv_file), out_all)
self._remove_header(out_all)
shutil.copy(os.path.join(out_path, filename, csv_file),
folder_path)
else:
shutil.copyfile(os.path.join(out_path, filenames[0],
"_".join([filenames[0], "uniprot.csv"])),
out_all)
self.helper.remove_all_content(out_path, None, "dir")
self.helper.remove_all_content(out_path, None, "file")
for folder in folders:
folder_prefix = folder.split("/")[-1]
shutil.move(folder, os.path.join(out_path, folder_prefix))
for file_ in os.listdir(os.path.join(out_path, folder_prefix)):
log.write("\t" + os.path.join(out_path, folder_prefix, file_) +
" is generated.\n")
def _stat(self, out_path, stat_path, go, goslim, out_folder, log):
log.write("Running gene_ontology.py to Retrieve GOslim terms and "
"do statistics.\n")
log.write("The following files are generated:\n")
for folder in os.listdir(out_path):
strain_stat_path = os.path.join(stat_path, folder)
self.helper.check_make_folder(strain_stat_path)
fig_path = os.path.join(strain_stat_path, "figs")
if "fig" not in os.listdir(strain_stat_path):
#.........这里部分代码省略.........
示例6: Ribos
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import merge_file [as 别名]
#.........这里部分代码省略.........
log.write("Running recompute_RBS.py and modify_rbs_table.py "
"to generate tables for {0} "
"based on the scanning results.\n".format(prefix))
reextract_rbs(sec_scan_file, first_table, sec_table,
args_ribo.cutoff)
shutil.move(sec_table, first_table)
modify_table(first_table, args_ribo.output_all)
return prefixs
def _merge_results(self, args_ribo, scan_folder, suffixs, tmp_files,
table_folder, stat_folder, feature_id, gff_outfolder,
feature, log):
'''merge the results from the results of two searching'''
for gff in os.listdir(args_ribo.gffs):
if gff.endswith(".gff"):
prefix = gff.replace(".gff", "")
print("Merging results of {0}".format(prefix))
pre_strain = ""
self.helper.check_make_folder(os.path.join(
scan_folder, prefix))
fh = open(os.path.join(args_ribo.gffs, gff))
log.write("Merging the results from Infernal to generate "
"tables for {0}.\n".format(prefix))
for entry in self.gff_parser.entries(fh):
if entry.seq_id != pre_strain:
if len(pre_strain) == 0:
shutil.copyfile(os.path.join(
tmp_files["table"],
"_".join([entry.seq_id, suffixs["csv"]])),
os.path.join(
table_folder,
"_".join([prefix, suffixs["csv"]])))
else:
self.helper.merge_file(os.path.join(
tmp_files["table"],
"_".join([entry.seq_id, suffixs["csv"]])),
os.path.join(
table_folder,
"_".join([prefix, suffixs["csv"]])))
shutil.copy(os.path.join(
tmp_files["scan"],
"_".join([entry.seq_id, suffixs["txt"]])),
os.path.join(scan_folder, prefix))
shutil.copy(os.path.join(
tmp_files["scan"],
"_".join([entry.seq_id, suffixs["re_txt"]])),
os.path.join(scan_folder, prefix))
pre_strain = entry.seq_id
log.write("The following files are generated.\n")
for folder in (table_folder, scan_folder):
for file_ in os.listdir(folder):
log.write("\t" + os.path.join(folder, file_) + "\n")
out_stat = os.path.join(
stat_folder,
"_".join(["stat", prefix, feature + ".txt"]))
print("Computing statistics of {0}".format(prefix))
log.write("Running ribo_gff.py to do statistics and generate "
"gff files for {0}.\n".format(prefix))
log.write("The following files are generated:\n")
out_gff = os.path.join(gff_outfolder, "_".join([
prefix, feature + ".gff"]))
stat_and_covert2gff(os.path.join(
table_folder, "_".join([prefix, suffixs["csv"]])),
feature_id, out_gff,
args_ribo.fuzzy, out_stat, feature)
log.write("\t" + out_gff + "\n")
示例7: MEME
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import merge_file [as 别名]
#.........这里部分代码省略.........
"n", os.path.join(input_path, fasta)]) + "\n")
call([args_pro.glam2_path,
"-O", os.path.join(glam_folder, folder), "-a",
str(min_width), "-b", str(max_width), "-r",
str(args_pro.num_motif), "-n", str(args_pro.end_run),
"n", os.path.join(input_path, fasta)])
def _get_fasta_file(self, fasta_path, prefix):
for fasta in os.listdir(fasta_path):
if (fasta.endswith(".fa")) and \
(prefix == fasta.replace(".fa", "")):
break
elif (fasta.endswith(".fna")) and \
(prefix == fasta.replace(".fna", "")):
break
elif (fasta.endswith(".fasta")) and \
(prefix == fasta.replace(".fasta", "")):
break
return fasta
def _check_gff(self, gffs):
for gff in os.listdir(gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(gffs, gff))
def _move_and_merge_fasta(self, input_path, prefix):
all_type = os.path.join(self.tmp_folder, self.fastas["all"])
all_no_orph = os.path.join(self.tmp_folder, self.fastas["all_no_orph"])
if self.fastas["all"] in os.listdir(self.tmp_folder):
os.remove(all_type)
if self.fastas["all_no_orph"] in os.listdir(self.tmp_folder):
os.remove(all_no_orph)
shutil.copyfile(self.fastas["pri"], self.fastas["tmp_fa"])
self.helper.merge_file(self.fastas["sec"], self.fastas["tmp_fa"])
self.helper.merge_file(self.fastas["inter"], self.fastas["tmp_fa"])
self.helper.merge_file(self.fastas["anti"], self.fastas["tmp_fa"])
shutil.copyfile(self.fastas["tmp_fa"], self.fastas["tmp_all"])
self.helper.merge_file(self.fastas["orph"], self.fastas["tmp_all"])
del_repeat_fasta(self.fastas["tmp_fa"], all_no_orph)
del_repeat_fasta(self.fastas["tmp_all"], all_type)
os.remove(self.fastas["tmp_fa"])
os.remove(self.fastas["tmp_all"])
out_prefix = os.path.join(input_path, prefix)
shutil.move(self.fastas["pri"], "_".join([
out_prefix, "allgenome_primary.fa"]))
shutil.move(self.fastas["sec"], "_".join([
out_prefix, "allgenome_secondary.fa"]))
shutil.move(self.fastas["inter"], "_".join([
out_prefix, "allgenome_internal.fa"]))
shutil.move(self.fastas["anti"], "_".join([
out_prefix, "allgenome_antisense.fa"]))
shutil.move(self.fastas["orph"], "_".join([
out_prefix, "allgenome_orphan.fa"]))
shutil.move(all_type, "_".join([
out_prefix, "allgenome_all_types.fa"]))
shutil.move(all_no_orph, "_".join([
out_prefix, "allgenome_without_orphan.fa"]))
def _split_fasta_by_strain(self, input_path):
for fasta in os.listdir(input_path):
if "allgenome" not in fasta:
os.remove(os.path.join(input_path, fasta))
out = None
for fasta in os.listdir(input_path):
if fasta.endswith(".fa"):
pre_strain = ""
示例8: SubLocal
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import merge_file [as 别名]
#.........这里部分代码省略.........
tmp_psortb_path, "_".join([prefix, self.endfix_raw])),
os.path.join(tmp_psortb_path, "_".join([
prefix, self.endfix_table])),
None, None, args_sub.fuzzy)
log.write("\t" + os.path.join(tmp_psortb_path, "_".join([
prefix, self.endfix_table])) + " is tempoaray generated.\n")
def _remove_header(self, out_all):
out = open(out_all + "_tmp", "w")
fh = open(out_all, "r")
out.write("\t".join(["#Genome", "Protein", "Strand", "Start",
"End", "Location", "Score"]) + "\n")
for row in csv.reader(fh, delimiter='\t'):
if row[0] != "#Genome":
out.write("\t".join(row) + "\n")
out.close()
fh.close()
shutil.move(out_all + "_tmp", out_all)
def _merge_and_stat(self, gffs, tmp_psortb_path, stat_path, psortb_result,
log):
for folder in os.listdir(gffs):
if folder.endswith(".gff_folder"):
prefix = folder.replace(".gff_folder", "")
self.helper.check_make_folder(
os.path.join(psortb_result, prefix))
merge_table = os.path.join(
psortb_result, prefix,
"_".join([prefix, self.endfix_table]))
for gff in os.listdir(os.path.join(gffs, folder)):
result = self.helper.get_correct_file(
tmp_psortb_path, "_" + self.endfix_raw,
gff.replace(".gff", ""), None, None)
shutil.copy(result, os.path.join(psortb_result, prefix))
result = self.helper.get_correct_file(
tmp_psortb_path, "_" + self.endfix_table,
gff.replace(".gff", ""), None, None)
self.helper.merge_file(result, merge_table)
log.write("\t" + merge_table + "\n")
self._remove_header(merge_table)
self.helper.check_make_folder(os.path.join(stat_path, prefix))
stat_folder = os.path.join(stat_path, prefix)
stat_file = os.path.join(stat_folder, "_".join([
"stat", prefix, "sublocal.csv"]))
stat_sublocal(merge_table,
os.path.join(stat_folder, prefix),
stat_file)
for file_ in os.listdir(stat_folder):
log.write("\t" + os.path.join(stat_folder, file_) + "\n")
def _remove_tmps(self, args_sub):
self.helper.remove_tmp_dir(args_sub.fastas)
self.helper.remove_tmp_dir(args_sub.gffs)
self.helper.remove_all_content(args_sub.out_folder, "tmp", "dir")
self.helper.remove_all_content(self.out_all, "tmp", "dir")
self.helper.remove_all_content(self.out_express, "tmp", "dir")
os.remove(os.path.join(self.out_all, "tmp_log"))
if args_sub.trans is not None:
os.remove(os.path.join(self.out_express, "tmp_log"))
self.helper.remove_tmp_dir(args_sub.trans)
def run_sub_local(self, args_sub, log):
for gff in os.listdir(args_sub.gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(
args_sub.gffs, gff))
self.multiparser.parser_gff(args_sub.gffs, None)
self.multiparser.parser_fasta(args_sub.fastas)
if args_sub.trans is not None:
self.multiparser.parser_gff(args_sub.trans, "transcript")
self.helper.check_make_folder(self.express_tmp_path)
self.helper.check_make_folder(self.express_tmp_result)
self.helper.check_make_folder(self.all_tmp_path)
self.helper.check_make_folder(self.all_tmp_result)
for gff in os.listdir(self.gff_path):
if args_sub.trans is not None:
print("Running expressed genes now")
prefix = self._get_protein_seq(gff, self.express_tmp_path,
self.tran_path, args_sub, log)
self._run_psortb(args_sub, prefix, self.out_express,
self.express_tmp_path,
self.express_tmp_result, log)
self._extract_result(args_sub, self.express_tmp_result, prefix,
os.path.join(self.gff_path, gff), log)
print("Running all genes now")
prefix = self._get_protein_seq(gff, self.all_tmp_path, None,
args_sub, log)
self._run_psortb(args_sub, prefix, self.out_all,
self.all_tmp_path, self.all_tmp_result, log)
self._extract_result(args_sub, self.all_tmp_result, prefix,
os.path.join(self.gff_path, gff), log)
log.write("Running stat_sublocal.py to do statistics, generate "
"merged tables, and plot figures.\n")
log.write("The following files are generated:\n")
self._merge_and_stat(args_sub.gffs, self.all_tmp_result,
self.all_stat_path, self.all_result, log)
if args_sub.trans is not None:
self._merge_and_stat(args_sub.gffs, self.express_tmp_result,
self.express_stat_path, self.express_result, log)
self._remove_tmps(args_sub)
示例9: SubLocal
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import merge_file [as 别名]
#.........这里部分代码省略.........
self._psortb(args_sub.psortb_path, "-p", prot_seq_file,
out_raw, out_err)
elif args_sub.gram == "negative":
self._psortb(args_sub.psortb_path, "-n", prot_seq_file,
out_raw, out_err)
else:
print("Error:It is not a proper bacteria type - {0}!!".format(
args_sub.gram))
sys.exit()
out_err.close()
out_raw.close()
def _extract_result(self, args_sub, tmp_psortb_path, prefix, gff_file):
if args_sub.merge:
print("Merge to gff...")
extract_psortb(os.path.join(
tmp_psortb_path, "_".join([prefix, self.endfix_raw])),
os.path.join(tmp_psortb_path, "_".join([
prefix, self.endfix_table])),
gff_file, os.path.join(prefix + ".gff"),
args_sub.fuzzy)
shutil.move(prefix + ".gff", gff_file)
else:
extract_psortb(os.path.join(
tmp_psortb_path, "_".join([prefix, self.endfix_raw])),
os.path.join(tmp_psortb_path, "_".join([
prefix, self.endfix_table])),
None, None, args_sub.fuzzy)
def _merge_and_stat(self, gffs, tmp_psortb_path, stat_path, psortb_result):
for folder in os.listdir(gffs):
if folder.endswith(".gff_folder"):
prefix = folder.replace(".gff_folder", "")
self.helper.check_make_folder(
os.path.join(psortb_result, prefix))
merge_table = os.path.join(
psortb_result, prefix,
"_".join([prefix, self.endfix_table]))
for gff in os.listdir(os.path.join(gffs, folder)):
result = self.helper.get_correct_file(
tmp_psortb_path, "_" + self.endfix_raw,
gff.replace(".gff", ""), None, None)
shutil.copy(result, os.path.join(psortb_result, prefix))
result = self.helper.get_correct_file(
tmp_psortb_path, "_" + self.endfix_table,
gff.replace(".gff", ""), None, None)
self.helper.merge_file(result, merge_table)
self.helper.check_make_folder(os.path.join(stat_path, prefix))
stat_sublocal(merge_table,
os.path.join(
stat_path, prefix, prefix),
os.path.join(
stat_path, prefix, "_".join([
"stat", prefix, "sublocal.csv"])))
def _remove_tmps(self, args_sub):
self.helper.remove_tmp(args_sub.fastas)
self.helper.remove_tmp(args_sub.gffs)
self.helper.remove_all_content(args_sub.out_folder, "tmp", "dir")
self.helper.remove_all_content(self.out_all, "tmp", "dir")
self.helper.remove_all_content(self.out_express, "tmp", "dir")
os.remove(os.path.join(self.out_all, "tmp_log"))
if args_sub.trans is not None:
os.remove(os.path.join(self.out_express, "tmp_log"))
def run_sub_local(self, args_sub):
for gff in os.listdir(args_sub.gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(
args_sub.gffs, gff))
self.multiparser.parser_gff(args_sub.gffs, None)
self.multiparser.parser_fasta(args_sub.fastas)
if args_sub.trans is not None:
self.multiparser.parser_gff(args_sub.trans, "transcript")
self.helper.check_make_folder(self.express_tmp_path)
self.helper.check_make_folder(self.express_tmp_result)
self.helper.check_make_folder(self.all_tmp_path)
self.helper.check_make_folder(self.all_tmp_result)
for gff in os.listdir(self.gff_path):
if args_sub.trans is not None:
print("Running expressed gene now...")
prefix = self._get_protein_seq(gff, self.express_tmp_path,
self.tran_path)
self._run_psortb(args_sub, prefix, self.out_express,
self.express_tmp_path,
self.express_tmp_result)
self._extract_result(args_sub, self.express_tmp_result, prefix,
os.path.join(self.gff_path, gff))
print("Running all gene now...")
prefix = self._get_protein_seq(gff, self.all_tmp_path, None)
self._run_psortb(args_sub, prefix, self.out_all,
self.all_tmp_path, self.all_tmp_result)
self._extract_result(args_sub, self.all_tmp_result, prefix,
os.path.join(self.gff_path, gff))
self._merge_and_stat(args_sub.gffs, self.all_tmp_result,
self.all_stat_path, self.all_result)
if args_sub.trans is not None:
self._merge_and_stat(args_sub.gffs, self.express_tmp_result,
self.express_stat_path, self.express_result)
self._remove_tmps(args_sub)
示例10: sRNATargetPrediction
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import merge_file [as 别名]
#.........这里部分代码省略.........
"-l", str(args_tar.inter_length),
"-e", str(args_tar.energy),
"-z", str(args_tar.duplex_dist),
"-a", rnaplfold_path], stdout=out_rnaplex)
processes.append(p)
if num_process % args_tar.core_plex == 0:
self._wait_process(processes)
self._wait_process(processes)
return num_process
def _rna_plex(self, prefixs, args_tar):
for prefix in prefixs:
print("Running RNAplfold of {0}".format(prefix))
self.helper.check_make_folder(
os.path.join(self.rnaplex_path, prefix))
rnaplfold_path = os.path.join(self.rnaplex_path, prefix,
"RNAplfold")
os.mkdir(rnaplfold_path)
self._run_rnaplfold(
args_tar.vienna_path, "sRNA", args_tar.win_size_s,
args_tar.span_s, args_tar.unstr_region_rnaplex_s,
self.srna_seq_path, prefix, rnaplfold_path)
self._run_rnaplfold(
args_tar.vienna_path, "target", args_tar.win_size_t,
args_tar.span_t, args_tar.unstr_region_rnaplex_t,
self.target_seq_path, prefix, rnaplfold_path)
num_process = self._run_rnaplex(prefix, rnaplfold_path, args_tar)
rnaplex_file = os.path.join(self.rnaplex_path, prefix,
"_".join([prefix, "RNAplex.txt"]))
if ("_".join([prefix, "RNAplex.txt"]) in
os.listdir(os.path.join(self.rnaplex_path, prefix))):
os.remove(rnaplex_file)
for index in range(0, num_process):
self.helper.merge_file(os.path.join(
self.rnaplex_path, prefix, "_".join([
prefix, "RNAplex", str(index) + ".txt"])),
rnaplex_file)
self.helper.remove_all_content(os.path.join(
self.rnaplex_path, prefix), "_RNAplex_", "file")
self.fixer.fix_rnaplex(rnaplex_file, self.tmps["tmp"])
shutil.move(self.tmps["tmp"], rnaplex_file)
def _run_rnaup(self, num_up, processes, out_rnaup, out_log, args_tar):
for index in range(1, num_up + 1):
out_tmp_up = open(os.path.join(
args_tar.out_folder, "".join([self.tmps["rnaup"],
str(index), ".txt"])), "w")
out_err = open(os.path.join(
args_tar.out_folder, "".join([self.tmps["log"],
str(index), ".txt"])), "w")
in_up = open(os.path.join(
args_tar.out_folder, "".join([self.tmps["tmp"],
str(index), ".fa"])), "r")
p = Popen([os.path.join(args_tar.vienna_path, "RNAup"),
"-u", str(args_tar.unstr_region_rnaup),
"-o", "--interaction_first"],
stdin=in_up, stdout=out_tmp_up, stderr=out_err)
processes.append(p)
if len(processes) != 0:
time.sleep(5)
self._wait_process(processes)
os.system("rm " + os.path.join(args_tar.out_folder,
self.tmps["all_fa"]))
self._merge_txt(num_up, out_rnaup, out_log, args_tar.out_folder)
os.system("rm " + os.path.join(args_tar.out_folder,
self.tmps["all_txt"]))
示例11: RATT
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import merge_file [as 别名]
#.........这里部分代码省略.........
"genbank or embl files!")
log.write("The strain names in --compare_pair should be the same "
"as the strain names in fasta, genbank, or embl files.\n")
sys.exit()
log.write("Make sure your RATT version is at least 1.64.\n")
log.write("If the RATT can not run properly, please check the "
"RATT_HOME and PAGIT_HOME is assigned correctly.\n")
log.write(" ".join([args_ratt.ratt_path, self.embl,
os.path.join(self.tmp_files["tar"], tar + ".fa"),
args_ratt.element, args_ratt.transfer_type,
os.path.join(self.tmp_files["ref"], ref + ".fa")]) + "\n")
call([args_ratt.ratt_path, self.embl,
os.path.join(self.tmp_files["tar"], tar + ".fa"),
args_ratt.element, args_ratt.transfer_type,
os.path.join(self.tmp_files["ref"], ref + ".fa")],
stdout=out, stderr=DEVNULL)
log.write("Done!\n")
def _format_and_run(self, args_ratt, log):
print("Running RATT")
for pair in args_ratt.pairs:
ref = pair.split(":")[0]
tar = pair.split(":")[1]
out = open(self.ratt_log, "w+")
self._run_ratt(args_ratt, tar, ref, out, log)
log.write("The following files are generatd:\n")
for filename in os.listdir():
if ("final" in filename):
log.write("\t" + filename + "\n")
shutil.move(filename, os.path.join(args_ratt.output_path,
filename))
elif (args_ratt.element in filename) or (
"query" in filename) or (
"Reference" in filename) or (
"Query" in filename) or (
"Sequences" in filename):
log.write("\t" + filename + "\n")
if os.path.isfile(filename):
os.remove(filename)
if os.path.isdir(filename):
shutil.rmtree(filename)
out.close()
def annotation_transfer(self, args_ratt, log):
self.multiparser.parser_fasta(args_ratt.tar_fastas)
self.multiparser.parser_fasta(args_ratt.ref_fastas)
out_gbk = None
if args_ratt.ref_embls is None:
out_gbk = self._convert_embl(args_ratt.ref_gbki, log)
self._format_and_run(args_ratt, log)
files = []
for data in os.listdir(args_ratt.output_path):
if "final.embl" in data:
log.write("Running converter.py to convert embl "
"files in {0} to gff, ptt, and rnt format.\n".format(data))
self._convert_to_gff(data, args_ratt, files, log)
self._convert_to_pttrnt(args_ratt.gff_outfolder, files, log)
self.helper.check_make_folder(self.tmp_files["out_gff"])
log.write("Merging the output of {0}.\n".format(data))
for folder in os.listdir(args_ratt.tar_fastas):
files = []
if "_folder" in folder:
datas = folder.split("_folder")
prefix = ".".join(datas[0].split(".")[:-1])
for file_ in os.listdir(os.path.join(args_ratt.tar_fastas,
folder)):
files.append(file_[:-3])
for gff in os.listdir(args_ratt.gff_outfolder):
for file_ in files:
if (".gff" in gff) and (file_ == gff[:-4]):
self.helper.merge_file(os.path.join(
args_ratt.gff_outfolder, gff),
self.tmp_files["gff"])
if (".ptt" in gff) and (file_ == gff[:-4]):
self.helper.merge_file(os.path.join(
args_ratt.gff_outfolder, gff),
self.tmp_files["ptt"])
if (".rnt" in gff) and (file_ == gff[:-4]):
self.helper.merge_file(os.path.join(
args_ratt.gff_outfolder, gff),
self.tmp_files["rnt"])
if os.path.exists(self.tmp_files["gff"]):
shutil.move(self.tmp_files["gff"], os.path.join(
self.tmp_files["out_gff"], prefix + ".gff"))
shutil.move(self.tmp_files["ptt"], os.path.join(
self.tmp_files["out_gff"], prefix + ".ptt"))
shutil.move(self.tmp_files["rnt"], os.path.join(
self.tmp_files["out_gff"], prefix + ".rnt"))
else:
print("Error: Please check your fasta or "
"annotation files, they should only contain "
"the query genome. And make sure your RATT can "
"work properly (check $ANNOgesic/output/"
"annotation_transfer/ratt_log.txt).")
log.write("Please check your fasta or "
"annotation files, they should only contain "
"the query genome. And make sure your RATT can "
"work properly (check $ANNOgesic/output/"
"annotation_transfer/ratt_log.txt).\n")
self._remove_files(args_ratt, out_gbk, log)
示例12: MEME
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import merge_file [as 别名]
class MEME(object):
def __init__(self, args_pro):
self.multiparser = Multiparser()
self.helper = Helper()
self.tss_path = os.path.join(args_pro.tsss, "tmp")
if args_pro.gffs is not None:
self.gff_path = os.path.join(args_pro.gffs, "tmp")
else:
self.gff_path = None
self.out_fasta = os.path.join(args_pro.output_folder, "fasta_class")
self.tmp_folder = os.path.join(os.getcwd(), "tmp")
self.fastas = {"pri": os.path.join(self.tmp_folder, "primary.fa"),
"sec": os.path.join(self.tmp_folder, "secondary.fa"),
"inter": os.path.join(self.tmp_folder, "internal.fa"),
"anti": os.path.join(self.tmp_folder, "antisense.fa"),
"orph": os.path.join(self.tmp_folder, "orphan.fa"),
"all_no_orph": "without_orphan.fa",
"all": "all_type.fa",
"tmp_fa": os.path.join(self.tmp_folder, "tmp.fa"),
"tmp_all": os.path.join(self.tmp_folder, "tmp_all.fa")}
self.all_fasta = os.path.join(args_pro.fastas, "allfasta.fa")
self.all_tss = os.path.join(self.tss_path, "allfasta_TSS.gff")
def _run_normal_motif(self, input_path, out_path, filename,
fasta, width, args_pro):
print(os.path.join(input_path, fasta))
folder = "_".join(["promoter_motifs", filename,
str(width), "nt"])
if folder not in os.listdir(out_path):
call([args_pro.meme_path, "-maxsize", "1000000",
"-dna", "-nmotifs", str(args_pro.num_motif),
"-w", str(width), "-maxiter", "100",
"-evt", str(args_pro.e_value),
"-oc", os.path.join(out_path, folder),
os.path.join(input_path, fasta)])
def _run_small_motif(self, input_path, out_path, filename,
fasta, width, args_pro):
data = width.split("-")
min_width = data[0]
max_width = data[1]
folder = "_".join(["promoter_motifs", filename,
"-".join([str(min_width), str(max_width)]), "nt"])
if folder not in os.listdir(out_path):
call([args_pro.meme_path, "-maxsize", "1000000",
"-dna", "-nmotifs", str(args_pro.num_motif),
"-minsites", "0", "-maxsites", "2",
"-minw", str(min_width), "-maxw", str(max_width),
"-maxiter", "100",
"-evt", str(args_pro.e_value),
"-oc", os.path.join(out_path, folder),
os.path.join(input_path, fasta)])
def _get_fasta_file(self, fasta_path, prefix):
for fasta in os.listdir(fasta_path):
if (fasta.endswith(".fa")) and \
(prefix == fasta.replace(".fa", "")):
break
elif (fasta.endswith(".fna")) and \
(prefix == fasta.replace(".fna", "")):
break
elif (fasta.endswith(".fasta")) and \
(prefix == fasta.replace(".fasta", "")):
break
return fasta
def _check_gff(self, gffs):
for gff in os.listdir(gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(gffs, gff))
def _move_and_merge_fasta(self, input_path, prefix):
all_type = os.path.join(self.tmp_folder, self.fastas["all"])
all_no_orph = os.path.join(self.tmp_folder, self.fastas["all_no_orph"])
if self.fastas["all"] in os.listdir(self.tmp_folder):
os.remove(all_type)
if self.fastas["all_no_orph"] in os.listdir(self.tmp_folder):
os.remove(all_no_orph)
shutil.copyfile(self.fastas["pri"], self.fastas["tmp_fa"])
self.helper.merge_file(self.fastas["sec"], self.fastas["tmp_fa"])
self.helper.merge_file(self.fastas["inter"], self.fastas["tmp_fa"])
self.helper.merge_file(self.fastas["anti"], self.fastas["tmp_fa"])
shutil.copyfile(self.fastas["tmp_fa"], self.fastas["tmp_all"])
self.helper.merge_file(self.fastas["orph"], self.fastas["tmp_all"])
del_repeat_fasta(self.fastas["tmp_fa"], all_no_orph)
del_repeat_fasta(self.fastas["tmp_all"], all_type)
os.remove(self.fastas["tmp_fa"])
os.remove(self.fastas["tmp_all"])
out_prefix = os.path.join(input_path, prefix)
shutil.move(self.fastas["pri"], "_".join([
out_prefix, "allstrain_primary.fa"]))
shutil.move(self.fastas["sec"], "_".join([
out_prefix, "allstrain_secondary.fa"]))
shutil.move(self.fastas["inter"], "_".join([
out_prefix, "allstrain_internal.fa"]))
shutil.move(self.fastas["anti"], "_".join([
out_prefix, "allstrain_antisense.fa"]))
shutil.move(self.fastas["orph"], "_".join([
out_prefix, "allstrain_orphan.fa"]))
#.........这里部分代码省略.........
示例13: CircRNADetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import merge_file [as 别名]
#.........这里部分代码省略.........
for fasta in os.listdir(fastas):
headers = []
if (fasta.endswith(".fa") or fasta.endswith(".fna") or
fasta.endswith(".fasta")):
with open(os.path.join(fastas, fasta), "r") as f_h:
for line in f_h:
line = line.strip()
if line.startswith(">"):
headers.append(line[1:])
filename = fasta.split(".")
fasta_prefix = ".".join(filename[:-1])
tmp_prefixs.append(fasta_prefix)
self.helper.check_make_folder(os.path.join(
os.getcwd(), fasta_prefix))
for header in headers:
shutil.copyfile(os.path.join(splice_path, header,
self.splices["file"]),
os.path.join(fasta_prefix,
"_".join([self.splices["splice"],
header + ".bed"])))
shutil.copyfile(os.path.join(splice_path, header,
self.trans["file"]),
os.path.join(fasta_prefix,
"_".join([self.trans["trans"],
header + ".bed"])))
out_splice = os.path.join(fasta_prefix,
self.splices["all_file"])
out_trans = os.path.join(fasta_prefix,
self.trans["all_file"])
if len(headers) > 1:
for file_ in os.listdir(fasta_prefix):
if (self.splices["splice"] in file_) and (
self.splices["all"] not in file_):
self.helper.merge_file(os.path.join(
fasta_prefix, file_), out_splice)
elif (self.trans["trans"] in file_) and (
self.trans["all"] not in file_):
self.helper.merge_file(os.path.join(
fasta_prefix, file_), out_trans)
else:
shutil.move(os.path.join(
fasta_prefix,
"_".join([self.splices["splice"],
headers[0] + ".bed"])),
out_splice)
shutil.move(os.path.join(
fasta_prefix,
"_".join([self.trans["trans"],
headers[0] + ".bed"])),
out_trans)
self.helper.remove_all_content(splice_path, None, "dir")
return tmp_prefixs
def _stat_and_gen_gff(self, tmp_prefixs, args_circ):
for prefix in tmp_prefixs:
self.helper.check_make_folder(os.path.join(self.gff_folder,
prefix))
shutil.copytree(prefix, os.path.join(self.splice_path, prefix))
self.helper.check_make_folder(os.path.join(
self.candidate_path, prefix))
print("comparing with annotation of {0}".format(prefix))
if self.splices["all_file"] in os.listdir(os.path.join(
self.splice_path, prefix)):
detect_circrna(os.path.join(self.splice_path, prefix,
self.splices["all_file"]), os.path.join(
self.gff_path, prefix + ".gff"),
示例14: GoTermFinding
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import merge_file [as 别名]
class GoTermFinding(object):
def __init__(self, args_go):
self.multiparser = Multiparser()
self.helper = Helper()
self.out_all = os.path.join(args_go.out_folder, "all_CDS")
self.out_express = os.path.join(args_go.out_folder, "expressed_CDS")
self.result_all_path = os.path.join(self.out_all, "Go_term_results")
self.result_express_path = os.path.join(self.out_express,
"Go_term_results")
self.gff_path = os.path.join(args_go.gffs, "tmp")
if args_go.trans is not None:
self.tran_path = os.path.join(args_go.trans, "tmp")
else:
self.tran_path = None
self.stat_all_path = os.path.join(self.out_all, "statistics")
self.stat_express_path = os.path.join(self.out_express,
"statistics")
self.all_strain = "all_strains_uniprot.csv"
def _retrieve_go(self, uniprot, out_path, type_):
prefixs = []
for gff in os.listdir(self.gff_path):
prefix = gff.replace(".gff", "")
prefixs.append(prefix)
self.helper.check_make_folder(os.path.join(out_path, prefix))
out_file = os.path.join(out_path, prefix,
"_".join([prefix, "uniprot.csv"]))
print("extracting Go terms of {0} from UniProt...".format(prefix))
if self.tran_path is not None:
tran_file = os.path.join(self.tran_path,
"_".join([prefix, "transcript.gff"]))
else:
tran_file = None
retrieve_uniprot(uniprot, os.path.join(self.gff_path, gff),
out_file, tran_file, type_)
def _merge_files(self, gffs, out_path, out_folder):
folders = []
for folder in os.listdir(gffs):
if folder.endswith("gff_folder"):
folder_prefix = folder.replace(".gff_folder", "")
folder_path = os.path.join(out_folder, folder_prefix)
self.helper.check_make_folder(folder_path)
folders.append(folder_path)
filenames = []
for gff in os.listdir(os.path.join(gffs, folder)):
if gff.endswith(".gff"):
filenames.append(gff.replace(".gff", ""))
out_all = os.path.join(folder_path, self.all_strain)
if len(filenames) > 1:
if self.all_strain in os.listdir(folder_path):
os.remove(out_all)
for filename in filenames:
csv_file = "_".join([filename, "uniprot.csv"])
self.helper.merge_file(os.path.join(out_path,
filename, csv_file), out_all)
shutil.copy(os.path.join(out_path, filename, csv_file),
folder_path)
else:
shutil.copyfile(os.path.join(out_path, filenames[0],
"_".join([filenames[0], "uniprot.csv"])),
out_all)
self.helper.remove_all_content(out_path, None, "dir")
self.helper.remove_all_content(out_path, None, "file")
for folder in folders:
folder_prefix = folder.split("/")[-1]
shutil.move(folder, os.path.join(out_path, folder_prefix))
def _stat(self, out_path, stat_path, go, goslim, out_folder):
for folder in os.listdir(out_path):
strain_stat_path = os.path.join(stat_path, folder)
self.helper.check_make_folder(strain_stat_path)
fig_path = os.path.join(strain_stat_path, "figs")
if "fig" not in os.listdir(strain_stat_path):
os.mkdir(fig_path)
print("Computing statistics of {0}".format(folder))
map2goslim(goslim, go,
os.path.join(out_path, folder, self.all_strain),
os.path.join(strain_stat_path,
"_".join(["stat", folder + ".csv"])),
out_folder)
self.helper.move_all_content(out_folder, fig_path,
["_three_roots.png"])
self.helper.move_all_content(out_folder, fig_path,
["_molecular_function.png"])
self.helper.move_all_content(out_folder, fig_path,
["_cellular_component.png"])
self.helper.move_all_content(out_folder, fig_path,
["_biological_process.png"])
def run_go_term(self, args_go):
for gff in os.listdir(args_go.gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(
args_go.gffs, gff))
self.multiparser.parser_gff(args_go.gffs, None)
if args_go.trans is not None:
self.multiparser.parser_gff(args_go.trans, "transcript")
print("Computing all CDS...")
#.........这里部分代码省略.........
示例15: RATT
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import merge_file [as 别名]
#.........这里部分代码省略.........
if out:
out.write(line)
if line.startswith("//"):
out.close()
close = True
shutil.move(self.gbk_tmp,
os.path.join(self.gbk, filename))
if not close:
out.close()
return self.gbk
def _convert_embl(self, ref_embls):
detect_gbk = False
gbks = []
out_gbk = None
for embl in os.listdir(ref_embls):
if embl.endswith(".gbk"):
detect_gbk = True
gbks.append(os.path.join(ref_embls, embl))
if not detect_gbk:
print("Error: please assign proper folder for Genebank file!!!")
sys.exit()
elif detect_gbk:
out_gbk = self._parser_embl_gbk(gbks)
self.converter.convert_gbk2embl(out_gbk)
self.helper.check_make_folder(self.embl)
self.helper.move_all_content(out_gbk, self.embl, [".embl"])
return out_gbk
def _run_ratt(self, args_ratt, tar, ref, out):
call([args_ratt.ratt_path, self.embl,
os.path.join(self.tmp_files["tar"], tar + ".fa"),
args_ratt.element, args_ratt.transfer_type,
os.path.join(self.tmp_files["ref"], ref + ".fa")],
stdout=out, stderr=DEVNULL)
def _format_and_run(self, args_ratt):
print("Running RATT...")
for pair in args_ratt.pairs:
ref = pair.split(":")[0]
tar = pair.split(":")[1]
out = open(self.ratt_log, "w+")
print(tar)
self._run_ratt(args_ratt, tar, ref, out)
for filename in os.listdir():
if ("final" in filename):
shutil.move(filename, os.path.join(args_ratt.output_path,
filename))
elif (args_ratt.element in filename) or (
"query" in filename) or (
"Reference" in filename) or (
"Query" in filename) or (
"Sequences" in filename):
if os.path.isfile(filename):
os.remove(filename)
if os.path.isdir(filename):
shutil.rmtree(filename)
out.close()
def annotation_transfer(self, args_ratt):
self.multiparser.parser_fasta(args_ratt.tar_fastas)
self.multiparser.parser_fasta(args_ratt.ref_fastas)
out_gbk = self._convert_embl(args_ratt.ref_embls)
self._format_and_run(args_ratt)
if args_ratt.convert:
files = []
for data in os.listdir(args_ratt.output_path):
if "final.embl" in data:
self._convert_to_gff(data, args_ratt, files)
self._convert_to_pttrnt(args_ratt.gff_outfolder, files)
self.helper.check_make_folder(self.tmp_files["out_gff"])
for folder in os.listdir(args_ratt.tar_fastas):
files = []
if "_folder" in folder:
datas = folder.split("_folder")
prefix = datas[0][:-3]
for file_ in os.listdir(os.path.join(args_ratt.tar_fastas,
folder)):
files.append(file_[:-3])
for gff in os.listdir(args_ratt.gff_outfolder):
for file_ in files:
if (".gff" in gff) and (file_ == gff[:-4]):
self.helper.merge_file(os.path.join(
args_ratt.gff_outfolder, gff),
self.tmp_files["gff"])
if (".ptt" in gff) and (file_ == gff[:-4]):
self.helper.merge_file(os.path.join(
args_ratt.gff_outfolder, gff),
self.tmp_files["ptt"])
if (".rnt" in gff) and (file_ == gff[:-4]):
self.helper.merge_file(os.path.join(
args_ratt.gff_outfolder, gff),
self.tmp_files["rnt"])
shutil.move(self.tmp_files["gff"], os.path.join(
self.tmp_files["out_gff"], prefix + ".gff"))
shutil.move(self.tmp_files["ptt"], os.path.join(
self.tmp_files["out_gff"], prefix + ".ptt"))
shutil.move(self.tmp_files["rnt"], os.path.join(
self.tmp_files["out_gff"], prefix + ".rnt"))
self._remove_files(args_ratt, out_gbk)