本文整理汇总了Python中annogesiclib.multiparser.Multiparser.parser_fasta方法的典型用法代码示例。如果您正苦于以下问题:Python Multiparser.parser_fasta方法的具体用法?Python Multiparser.parser_fasta怎么用?Python Multiparser.parser_fasta使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类annogesiclib.multiparser.Multiparser
的用法示例。
在下文中一共展示了Multiparser.parser_fasta方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: TargetFasta
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_fasta [as 别名]
class TargetFasta(object):
def __init__(self, tar_folder, ref_folder):
self.multiparser = Multiparser()
self.seq_editer = SeqEditer()
self.helper = Helper()
self.folders = {"tmp_tar": os.path.join(tar_folder, "tmp"),
"tmp_ref": os.path.join(ref_folder, "tmp")}
def get_target_fasta(self, mut_table, tar_folder, ref_folder, output):
self.multiparser.parser_fasta(ref_folder)
if "tmp" in os.listdir(tar_folder):
shutil.rmtree(self.folders["tmp_tar"])
os.mkdir(self.folders["tmp_tar"])
self.seq_editer.modify_seq(self.folders["tmp_ref"], mut_table,
self.folders["tmp_tar"])
print("transfer to target fasta...")
if output is not None:
for file_ in output:
first = True
datas = file_.split(":")
filename = datas[0]
strains = datas[1].split("_and_")
out = open(os.path.join(tar_folder, filename + ".fa"), "w")
for strain in strains:
if strain + ".fa" in os.listdir(self.folders["tmp_tar"]):
if first:
first = False
else:
out.write("\n")
with open(os.path.join(
self.folders["tmp_tar"],
strain + ".fa")) as f_h:
for line in f_h:
out.write(line)
else:
print("Error:no fasta information of {0}.fa".format(
strain))
out.close()
else:
self.helper.move_all_content(self.folders["tmp_tar"],
tar_folder, [".fa"])
shutil.rmtree(self.folders["tmp_tar"])
shutil.rmtree(self.folders["tmp_ref"])
self.helper.remove_all_content(ref_folder, "_folder", "dir")
print("please use the new fasta file to remapping again.")
print("Then copy BAMs and wigs back to input/align_results/BAMs "
"and input/align_results/wigs")
示例2: RATT
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_fasta [as 别名]
#.........这里部分代码省略.........
if out:
out.write(line)
if line.startswith("//"):
out.close()
close = True
shutil.move(self.gbk_tmp,
os.path.join(self.gbk, filename))
if not close:
out.close()
return self.gbk
def _convert_embl(self, ref_embls):
detect_gbk = False
gbks = []
out_gbk = None
for embl in os.listdir(ref_embls):
if embl.endswith(".gbk"):
detect_gbk = True
gbks.append(os.path.join(ref_embls, embl))
if not detect_gbk:
print("Error: please assign proper folder for Genebank file!!!")
sys.exit()
elif detect_gbk:
out_gbk = self._parser_embl_gbk(gbks)
self.converter.convert_gbk2embl(out_gbk)
self.helper.check_make_folder(self.embl)
self.helper.move_all_content(out_gbk, self.embl, [".embl"])
return out_gbk
def _run_ratt(self, args_ratt, tar, ref, out):
call([args_ratt.ratt_path, self.embl,
os.path.join(self.tmp_files["tar"], tar + ".fa"),
args_ratt.element, args_ratt.transfer_type,
os.path.join(self.tmp_files["ref"], ref + ".fa")],
stdout=out, stderr=DEVNULL)
def _format_and_run(self, args_ratt):
print("Running RATT...")
for pair in args_ratt.pairs:
ref = pair.split(":")[0]
tar = pair.split(":")[1]
out = open(self.ratt_log, "w+")
print(tar)
self._run_ratt(args_ratt, tar, ref, out)
for filename in os.listdir():
if ("final" in filename):
shutil.move(filename, os.path.join(args_ratt.output_path,
filename))
elif (args_ratt.element in filename) or (
"query" in filename) or (
"Reference" in filename) or (
"Query" in filename) or (
"Sequences" in filename):
if os.path.isfile(filename):
os.remove(filename)
if os.path.isdir(filename):
shutil.rmtree(filename)
out.close()
def annotation_transfer(self, args_ratt):
self.multiparser.parser_fasta(args_ratt.tar_fastas)
self.multiparser.parser_fasta(args_ratt.ref_fastas)
out_gbk = self._convert_embl(args_ratt.ref_embls)
self._format_and_run(args_ratt)
if args_ratt.convert:
files = []
for data in os.listdir(args_ratt.output_path):
if "final.embl" in data:
self._convert_to_gff(data, args_ratt, files)
self._convert_to_pttrnt(args_ratt.gff_outfolder, files)
self.helper.check_make_folder(self.tmp_files["out_gff"])
for folder in os.listdir(args_ratt.tar_fastas):
files = []
if "_folder" in folder:
datas = folder.split("_folder")
prefix = datas[0][:-3]
for file_ in os.listdir(os.path.join(args_ratt.tar_fastas,
folder)):
files.append(file_[:-3])
for gff in os.listdir(args_ratt.gff_outfolder):
for file_ in files:
if (".gff" in gff) and (file_ == gff[:-4]):
self.helper.merge_file(os.path.join(
args_ratt.gff_outfolder, gff),
self.tmp_files["gff"])
if (".ptt" in gff) and (file_ == gff[:-4]):
self.helper.merge_file(os.path.join(
args_ratt.gff_outfolder, gff),
self.tmp_files["ptt"])
if (".rnt" in gff) and (file_ == gff[:-4]):
self.helper.merge_file(os.path.join(
args_ratt.gff_outfolder, gff),
self.tmp_files["rnt"])
shutil.move(self.tmp_files["gff"], os.path.join(
self.tmp_files["out_gff"], prefix + ".gff"))
shutil.move(self.tmp_files["ptt"], os.path.join(
self.tmp_files["out_gff"], prefix + ".ptt"))
shutil.move(self.tmp_files["rnt"], os.path.join(
self.tmp_files["out_gff"], prefix + ".rnt"))
self._remove_files(args_ratt, out_gbk)
示例3: Ribos
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_fasta [as 别名]
#.........这里部分代码省略.........
regenerate_seq(first_scan_file, first_seq,
first_table, sec_seq)
print("scanning of {0}".format(prefix))
sec_scan_file = self._run_infernal(args_ribo, sec_seq,
"re_txt", prefix)
sec_table = os.path.join(
self.tmp_files["table"],
"_".join([prefix, self.suffixs["re_csv"]]))
reextract_rbs(sec_scan_file, first_table, sec_table)
shutil.move(sec_table, first_table)
modify_table(first_table, args_ribo.output_all)
return prefixs
def _merge_results(self, args_ribo):
for gff in os.listdir(args_ribo.gffs):
if gff.endswith(".gff"):
prefix = gff.replace(".gff", "")
print("Merge results of {0}".format(prefix))
pre_strain = ""
self.helper.check_make_folder(os.path.join(
self.scan_folder, prefix))
fh = open(os.path.join(args_ribo.gffs, gff))
for entry in self.gff_parser.entries(fh):
if entry.seq_id != pre_strain:
if len(pre_strain) == 0:
shutil.copyfile(os.path.join(
self.tmp_files["table"],
"_".join([entry.seq_id, self.suffixs["csv"]])),
os.path.join(
self.table_folder,
"_".join([prefix, self.suffixs["csv"]])))
else:
self.helper.merge_file(os.path.join(
self.tmp_files["table"],
"_".join([entry.seq_id, self.suffixs["csv"]])),
os.path.join(
self.table_folder,
"_".join([prefix, self.suffixs["csv"]])))
shutil.copy(os.path.join(
self.tmp_files["scan"],
"_".join([entry.seq_id, self.suffixs["txt"]])),
os.path.join(self.scan_folder, prefix))
shutil.copy(os.path.join(
self.tmp_files["scan"],
"_".join([entry.seq_id, self.suffixs["re_txt"]])),
os.path.join(self.scan_folder, prefix))
pre_strain = entry.seq_id
out_stat = os.path.join(
self.stat_folder,
"_".join(["stat", prefix, "riboswitch.txt"]))
print("compute statistics of {0}".format(prefix))
stat_and_covert2gff(os.path.join(
self.table_folder,
"_".join([prefix, self.suffixs["csv"]])),
args_ribo.ribos_id, os.path.join(
self.gff_outfolder,
"_".join([prefix, "riboswitch.gff"])),
args_ribo.fuzzy, out_stat)
fh.close()
def _remove_tmp(self, args_ribo):
self.helper.remove_tmp(args_ribo.gffs)
self.helper.remove_tmp(args_ribo.fastas)
self.helper.remove_all_content(args_ribo.out_folder, "tmp", "dir")
def _remove_overlap(self, gff_path):
for gff in os.listdir(gff_path):
if gff.endswith(".gff"):
rbs_overlap(
os.path.join(os.path.join(
self.tmp_files["table"],
"_".join([gff.replace(".gff", ""),
self.suffixs["csv"]]))),
os.path.join(gff_path, gff))
def run_ribos(self, args_ribo):
if args_ribo.fuzzy_rbs > 6:
print("Error: --fuzzy_rbs should be equal or less than 6!!")
sys.exit()
self.multiparser.parser_gff(args_ribo.gffs, None)
self.multiparser.parser_fasta(args_ribo.fastas)
self.multiparser.parser_gff(args_ribo.trans, "transcript")
self.multiparser.parser_gff(args_ribo.tsss, "TSS")
for gff in os.listdir(args_ribo.gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(
args_ribo.gffs, gff))
rbs_from_rfam(args_ribo.ribos_id, args_ribo.rfam, self.ribos_rfam)
print("compressing Rfam...")
call([os.path.join(args_ribo.infernal_path, "cmpress"),
"-F", self.ribos_rfam])
prefixs = []
self.helper.check_make_folder(self.tmp_files["fasta"])
self.helper.check_make_folder(self.tmp_files["scan"])
self.helper.check_make_folder(self.tmp_files["table"])
prefixs = self._scan_extract_rfam(prefixs, args_ribo)
self._remove_overlap(self.gff_path)
self._merge_results(args_ribo)
mapping_ribos(self.table_folder, args_ribo.ribos_id)
self._remove_tmp(args_ribo)
示例4: sRNATargetPrediction
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_fasta [as 别名]
#.........这里部分代码省略.........
checks[pre].append(line)
print_ = True
else:
if (num != 1):
if (line not in checks[srna_info]):
checks[srna_info].append(line)
print_ = True
else:
if (print_):
if (pre_num != len(checks)):
out_tmp.write(srna_info + "\n")
out_tmp.write(checks[srna_info][-1] + "\n")
out_tmp.write(line + "\n")
else:
if (not tar):
out_tmp.write(checks[srna_info][-1] + "\n")
out_tmp.write(line + "\n")
pre_num = len(checks)
tar = True
pre = line
num = num + 1
return num, pre_num, print_, pre, tar, srna_info
def _remove_intarna(self, line, checks, tar, srna_info, seq, out_tmp):
if (line.startswith(".")) or (
line.startswith("(")) or (
line.startswith(")")):
seq = line.split(";")[0]
if (seq not in checks[tar][srna_info]):
checks[tar][srna_info].append(seq)
out_tmp.write(line + "\n")
else:
if (len(line.split(";")) >= 8):
tar = line.split(";")[0]
srna_info = line.split(";")[3]
seq = line.split(";")[7]
if (tar not in checks):
checks[tar] = {}
checks[tar][srna_info] = [seq]
out_tmp.write(line + "\n")
else:
if (srna_info not in checks[tar]):
checks[tar][srna_info] = [seq]
out_tmp.write(line + "\n")
return tar, srna_info, seq
def _remove_repeat(self, interact_file, type_):
checks = {}
seq = ""
pre = ""
srna_info = ""
num = 1
tar = False
pre_num = 0
print_ = False
out_tmp = open(interact_file + "tmp", "w")
with open(interact_file) as fh:
for line in fh:
line = line.strip()
if (type_ == "RNAplex"):
num, pre_num, print_, pre = self._remove_rnaplex(
line, num, pre_num, pre, checks, out_tmp, print_)
elif (type_ == "RNAup"):
num, pre_num, print_, pre, tar, srna_info = (
self._remove_rnaup(
line, pre, num, pre_num,
srna_info, checks, out_tmp, print_, tar))
elif (type_ == "IntaRNA"):
tar, srna_info, seq = self._remove_intarna(
line, checks, tar, srna_info, seq, out_tmp)
out_tmp.close()
shutil.move(interact_file + "tmp", interact_file)
def run_srna_target_prediction(self, args_tar, log):
self._check_gff(args_tar.gffs)
self._check_gff(args_tar.srnas)
self.multiparser.parser_gff(args_tar.gffs, None)
self.multiparser.parser_fasta(args_tar.fastas)
self.multiparser.parser_gff(args_tar.srnas, "sRNA")
prefixs = []
self._gen_seq(prefixs, args_tar)
if ("RNAplex" in args_tar.program):
self._rna_plex(prefixs, args_tar, log)
self.helper.remove_all_content(self.target_seq_path,
"_target_", "file")
log.write("The temporary files for running RNAplex are deleted.\n")
if ("RNAup" in args_tar.program):
self._rnaup(prefixs, args_tar, log)
if ("IntaRNA" in args_tar.program):
self._intarna(prefixs, args_tar, log)
self._merge_rnaplex_rnaup(prefixs, args_tar, log)
self.helper.remove_all_content(args_tar.out_folder,
self.tmps["tmp"], "dir")
self.helper.remove_all_content(args_tar.out_folder,
self.tmps["tmp"], "file")
self.helper.remove_tmp_dir(args_tar.gffs)
self.helper.remove_tmp_dir(args_tar.srnas)
self.helper.remove_tmp_dir(args_tar.fastas)
self.helper.remove_all_content(self.srna_seq_path, "tmp_", "file")
示例5: Terminator
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_fasta [as 别名]
#.........这里部分代码省略.........
os.remove(os.path.join(self.terms["all"],
"_".join([prefix, self.suffixs["allgff"]])))
log.write("\t" + os.path.join(self.terms["all"],
"_".join([prefix, self.suffixs["gff"]])) + "\n")
log.write("\t" + os.path.join(self.terms["best"],
"_".join([prefix, self.suffixs["gff"]])) + "\n")
log.write("\t" + os.path.join(self.terms["express"],
"_".join([prefix, self.suffixs["gff"]])) + "\n")
log.write("\t" + os.path.join(self.terms["non"],
"_".join([prefix, self.suffixs["gff"]])) + "\n")
log.write("\t" + os.path.join(self.csvs["all"],
"_".join([prefix, self.suffixs["csv"]])) + "\n")
log.write("\t" + os.path.join(stat_path,
"_".join(["stat", prefix + ".csv"])) + "\n")
log.write("\t" + os.path.join(self.csvs["best"],
"_".join([prefix, self.suffixs["csv"]])) + "\n")
log.write("\t" + os.path.join(self.csvs["express"],
"_".join([prefix, self.suffixs["csv"]])) + "\n")
log.write("\t" + os.path.join(self.csvs["non"],
"_".join([prefix, self.suffixs["csv"]])) + "\n")
def _check_gff_file(self, folder):
for file_ in os.listdir(folder):
if file_.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(folder, file_))
def _compare_term_tran(self, args_term, prefixs, log):
'''searching the associated terminator to transcript'''
self.multiparser.combine_gff(args_term.gffs, self.tran_path,
None, "transcript")
prefixs = []
print("Comparing terminators with transcripts now")
for file_ in os.listdir(self.tran_path):
if file_.endswith("_transcript.gff"):
prefixs.append(file_.replace("_transcript.gff", ""))
log.write("Running compare_tran_term.py for comparing transcripts "
"and terminators.\n")
log.write("The following files are generated:\n")
for type_ in ("best_candidates", "expressed_candidates",
"all_candidates"):
compare_term_tran(self.tran_path,
os.path.join(self.outfolder["term"], type_),
args_term.fuzzy_up_ta, args_term.fuzzy_down_ta,
args_term.out_folder, "terminator",
self.outfolder["term"], args_term.trans)
for prefix in prefixs:
shutil.move(
os.path.join(
args_term.out_folder, "statistics",
"stat_compare_transcript_terminator_" + prefix + ".csv"),
os.path.join(
args_term.out_folder, "statistics",
"_".join(["stat_compare_terminator_transcript", prefix,
type_ + ".csv"])))
log.write("\t" + os.path.join(
args_term.out_folder, "statistics",
"_".join(["stat_compare_terminator_transcript", prefix,
type_ + ".csv"])) + "\n")
def _re_table(self, args_term, prefixs, log):
log.write("Running re_table.py to generate coverage information.\n")
log.write("The following files are updated:\n")
for type_ in ["all_candidates", "best_candidates",
"expressed_candidates", "non_expressed_candidates"]:
for table in os.listdir(os.path.join(
args_term.out_folder, "tables", type_)):
term_table = os.path.join(args_term.out_folder, "tables",
type_, table)
reorganize_table(args_term.libs, args_term.merge_wigs,
"Coverage_detail", term_table)
log.write("\t" + term_table + "\n")
def run_terminator(self, args_term, log):
self._check_gff_file(args_term.gffs)
self._check_gff_file(args_term.trans)
self.multiparser.parser_fasta(args_term.fastas)
if (not args_term.gffs) or (not args_term.fastas):
print("Error: Please assign gff files "
"and fasta files!")
sys.exit()
file_types, prefixs = self._convert_gff2rntptt(
self.gff_path, self.fasta_path, args_term.srnas, log)
check = self._combine_ptt_rnt(self.gff_path, file_types,
self.srna_path)
self._run_TransTermHP(args_term, log)
self._convert_to_gff(prefixs, args_term, log)
self.helper.remove_tmp(self.gff_path)
self.multiparser.parser_gff(args_term.trans, "transcript")
self.helper.check_make_folder(self.tmps["term_table"])
if check != "NO_CDS":
self.multiparser.parser_gff(self.tmps["transterm"],
self.tmps["hp"])
merge_path = self._merge_sRNA(args_term.srnas, prefixs, self.gff_path)
self._compute_intersection_forward_reverse(
prefixs, merge_path, args_term.wig_path,
args_term.merge_wigs, args_term, log)
self._compute_stat(args_term, log)
self._compare_term_tran(args_term, prefixs, log)
self._re_table(args_term, prefixs, log)
self._remove_tmp_file(args_term.merge_wigs, args_term)
示例6: Ribos
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_fasta [as 别名]
#.........这里部分代码省略.........
log.write("Running ribo_gff.py to do statistics and generate "
"gff files for {0}.\n".format(prefix))
log.write("The following files are generated:\n")
out_gff = os.path.join(gff_outfolder, "_".join([
prefix, feature + ".gff"]))
stat_and_covert2gff(os.path.join(
table_folder, "_".join([prefix, suffixs["csv"]])),
feature_id, out_gff,
args_ribo.fuzzy, out_stat, feature)
log.write("\t" + out_gff + "\n")
log.write("\t" + out_stat + "\n")
fh.close()
def _remove_tmp(self, args_ribo):
self.helper.remove_tmp_dir(args_ribo.gffs)
self.helper.remove_tmp_dir(args_ribo.fastas)
self.helper.remove_tmp_dir(args_ribo.trans)
self.helper.remove_tmp_dir(args_ribo.tsss)
def _remove_overlap(self, gff_path, tmp_files, suffixs, type_, fuzzy, log):
log.write("Running rbs_overlap.py to remove the overlapping "
"riboswitches/RNA thermometers.\n")
for gff in os.listdir(gff_path):
if gff.endswith(".gff"):
tmp_table = os.path.join(os.path.join(
tmp_files["table"], "_".join([
gff.replace(".gff", ""), suffixs["csv"]])))
rbs_overlap(tmp_table,
os.path.join(gff_path, gff), type_, fuzzy)
log.write("\t" + tmp_table + " is updated.\n")
def _core_prediction(self, args_ribo, feature_id, rfam, tmp_files,
table_folder, feature, scan_folder, suffixs,
stat_folder, gff_outfolder, out_folder, type_, log):
'''main part of detection'''
log.write("Running get_Rfam_ribo.py to get the information of "
"riboswitches/RNA thermometers from Rfam.\n")
rbs_from_rfam(feature_id, args_ribo.rfam, rfam)
log.write("Using Infernal to compress the Rfam data of "
"riboswitches/RNA thermometers.\n")
log.write("Please make sure the version of Infernal is at least 1.1.1.\n")
print("Compressing Rfam of " + feature)
log.write(" ".join([args_ribo.cmpress_path, "-F", rfam]) + "\n")
call([args_ribo.cmpress_path, "-F", rfam])
log.write("Done!\n")
prefixs = []
self.helper.check_make_folder(tmp_files["fasta"])
self.helper.check_make_folder(tmp_files["scan"])
self.helper.check_make_folder(tmp_files["table"])
prefixs = self._scan_extract_rfam(
prefixs, args_ribo, tmp_files, suffixs, feature, rfam, log)
self._remove_overlap(self.gff_path, tmp_files, suffixs, type_,
args_ribo.fuzzy, log)
self._merge_results(args_ribo, scan_folder, suffixs, tmp_files,
table_folder, stat_folder, feature_id,
gff_outfolder, feature, log)
log.write("Running map_ribos.py to extract all the details from Rfam.\n")
mapping_ribos(table_folder, feature_id, feature)
log.write("The following files are updated:\n")
for file_ in os.listdir(table_folder):
log.write("\t" + os.path.join(table_folder, file_) + "\n")
self.helper.remove_all_content(out_folder, "tmp", "dir")
def run_ribos(self, args_ribo, log_t, log_r):
if args_ribo.fuzzy_rbs > 6:
if log_t is not None:
log_t.write("--fuzzy_rbs should be equal or less than 6!\n")
if log_r is not None:
log_r.write("--fuzzy_rbs should be equal or less than 6!\n")
print("Error: --fuzzy_rbs should be equal or less than 6!")
sys.exit()
self.multiparser.parser_gff(args_ribo.gffs, None)
self.multiparser.parser_fasta(args_ribo.fastas)
self.multiparser.parser_gff(args_ribo.trans, "transcript")
if args_ribo.tsss is not None:
self.multiparser.parser_gff(args_ribo.tsss, "TSS")
for gff in os.listdir(args_ribo.gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(
args_ribo.gffs, gff))
if (args_ribo.program.lower() == "both") or (
args_ribo.program.lower() == "riboswitch"):
print("Detecting riboswtiches now")
self._core_prediction(
args_ribo, args_ribo.ribos_id, self.ribos_rfam,
self.ribos_tmp_files, self.ribos_table_folder,
"riboswitch", self.ribos_scan_folder, self.ribos_suffixs,
self.ribos_stat_folder, self.ribos_gff_outfolder,
args_ribo.ribos_out_folder, "riboswitch", log_r)
if (args_ribo.program.lower() == "both") or (
args_ribo.program.lower() == "thermometer"):
print("Detecting RNA thermometers now")
self._core_prediction(
args_ribo, args_ribo.thermo_id, self.thermo_rfam,
self.thermo_tmp_files, self.thermo_table_folder,
"RNA_thermometer", self.thermo_scan_folder,
self.thermo_suffixs, self.thermo_stat_folder,
self.thermo_gff_outfolder, args_ribo.thermo_out_folder,
"thermometer", log_t)
self._remove_tmp(args_ribo)
示例7: MEME
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_fasta [as 别名]
#.........这里部分代码省略.........
for folder in os.listdir(os.path.join(output_folder,
strain, "GLAM2")):
csv_file = os.path.join(output_folder, strain,
"GLAM2", folder, "glam2.csv")
gen_promoter_table(os.path.join(output_folder, strain,
"GLAM2", folder, "glam2.txt"),
csv_file, tss_file, "glam2")
log.write("\t" + csv_file + "\n")
def _get_upstream(self, args_pro, prefix, tss, fasta):
'''get upstream sequence of TSS'''
if args_pro.source:
print("Generating fasta file of {0}".format(prefix))
upstream(os.path.join(self.tss_path, tss),
os.path.join(args_pro.fastas, fasta),
None, None, args_pro, prefix)
else:
if (args_pro.gffs is None):
print("Error: Please assign proper annotation!!!")
sys.exit()
if "TSS_classes" not in os.listdir(args_pro.output_folder):
os.mkdir(os.path.join(args_pro.output_folder, "TSS_classes"))
print("Classifying TSSs and extracting sequence of {0}".format(prefix))
upstream(os.path.join(self.tss_path, tss),
os.path.join(args_pro.fastas, fasta),
os.path.join(self.gff_path, prefix + ".gff"),
os.path.join(args_pro.output_folder, "TSS_classes",
"_".join([prefix, "TSS.gff"])), args_pro, prefix)
def _get_used_tss_type(self, args_pro):
input_fastas = []
for tss in args_pro.use_tss:
if int(tss) == 1:
input_fastas.append("all_types")
elif int(tss) == 2:
input_fastas.append("primary")
elif int(tss) == 3:
input_fastas.append("secondary")
elif int(tss) == 4:
input_fastas.append("internal")
elif int(tss) == 5:
input_fastas.append("antisense")
elif int(tss) == 6:
input_fastas.append("orphan")
elif int(tss) == 7:
input_fastas.append("without_orphan")
else:
print("Error: The assignment of --use_tss_typ is wrong!")
sys.exit()
return input_fastas
def run_meme(self, args_pro, log):
if "allfasta.fa" in os.listdir(args_pro.fastas):
os.remove(self.all_fasta)
if "allfasta.fa_folder" in os.listdir(args_pro.fastas):
shutil.rmtree(os.path.join(args_pro.fastas,
"allfasta.fa_folder"))
self.multiparser.parser_fasta(args_pro.fastas)
self.multiparser.parser_gff(args_pro.tsss, "TSS")
if "allfasta_TSS.gff" in os.listdir(self.tss_path):
os.remove(self.all_tss)
if args_pro.gffs is not None:
self._check_gff(args_pro.gffs)
self.multiparser.parser_gff(args_pro.gffs, None)
self.multiparser.combine_gff(args_pro.fastas, self.gff_path,
"fasta", None)
self._check_gff(args_pro.tsss)
self.multiparser.combine_gff(args_pro.fastas, self.tss_path,
"fasta", "TSS")
self.helper.check_make_folder(self.out_fasta)
self.helper.check_make_folder(self.tmp_folder)
prefixs = []
log.write("Running .TSS_upstream.py to extract the upstream "
"sequences of TSSs.\n")
log.write("The following files are generated:\n")
for tss in os.listdir(self.tss_path):
prefix = tss.replace("_TSS.gff", "")
prefixs.append(prefix)
self.helper.check_make_folder(os.path.join(args_pro.output_folder,
prefix))
self.helper.check_make_folder(os.path.join(self.out_fasta,
prefix))
input_path = os.path.join(self.out_fasta, prefix)
fasta = self._get_fasta_file(args_pro.fastas, prefix)
self._get_upstream(args_pro, prefix, tss, fasta)
self._move_and_merge_fasta(input_path, prefix)
self._split_fasta_by_strain(input_path)
for file_ in os.listdir(input_path):
log.write("\t" + os.path.join(input_path, file_) + "\n")
if args_pro.combine:
self._combine_file(prefixs, args_pro)
for file_ in os.listdir(os.path.join(self.out_fasta, "allfasta")):
log.write("\t" + os.path.join(
self.out_fasta, "allfasta", file_) + "\n")
input_fastas = self._get_used_tss_type(args_pro)
self._run_program(prefixs, args_pro, log, input_fastas)
print("Generating the tables")
self._gen_table(args_pro.output_folder, prefixs,
args_pro.combine, args_pro.program, log)
self._remove_files(args_pro)
示例8: sRNATargetPrediction
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_fasta [as 别名]
#.........这里部分代码省略.........
if "_".join([prefix, "RNAup.txt"]) in \
os.listdir(os.path.join(self.rnaup_path, prefix)):
if not args_tar.continue_rnaup:
os.remove(out_rnaup)
os.remove(out_log)
else:
srnas = self._get_continue(out_rnaup)
with open(os.path.join(self.srna_seq_path, "_".join([
self.tmps["tmp"], prefix, "sRNA.fa"])), "r") as s_f:
for line in s_f:
line = line.strip()
if line.startswith(">"):
if line[1:] in srnas:
start = False
continue
start = True
print("Running RNAup with {0}".format(line[1:]))
num_up += 1
out_up = open(os.path.join(args_tar.out_folder,
"".join([self.tmps["tmp"],
str(num_up), ".fa"])), "w")
out_up.write(line + "\n")
else:
if start:
out_up.write(line + "\n")
out_up.close()
self.helper.merge_file(os.path.join(
self.target_seq_path,
"_".join([prefix, "target.fa"])),
os.path.join(args_tar.out_folder,
"".join([self.tmps["tmp"],
str(num_up), ".fa"])))
if num_up == args_tar.core_up:
self._run_rnaup(num_up, processes,
out_rnaup, out_log, args_tar)
processes = []
num_up = 0
self._run_rnaup(num_up, processes, out_rnaup, out_log, args_tar)
def _merge_rnaplex_rnaup(self, prefixs, args_tar):
for prefix in prefixs:
rnaplex_file = None
rnaup_file = None
out_rnaplex = None
out_rnaup = None
self.helper.check_make_folder(os.path.join(
self.merge_path, prefix))
print("Ranking {0} now...".format(prefix))
if (args_tar.program == "both") or (args_tar.program == "RNAplex"):
rnaplex_file = os.path.join(self.rnaplex_path, prefix,
"_".join([prefix, "RNAplex.txt"]))
out_rnaplex = os.path.join(
self.rnaplex_path, prefix,
"_".join([prefix, "RNAplex_rank.csv"]))
if (args_tar.program == "both") or (args_tar.program == "RNAup"):
rnaup_file = os.path.join(self.rnaup_path, prefix,
"_".join([prefix, "RNAup.txt"]))
out_rnaup = os.path.join(self.rnaup_path, prefix,
"_".join([prefix, "RNAup_rank.csv"]))
merge_srna_target(rnaplex_file, rnaup_file, args_tar,
out_rnaplex, out_rnaup,
os.path.join(self.merge_path, prefix,
"_".join([prefix, "merge.csv"])),
os.path.join(self.merge_path, prefix,
"_".join([prefix, "overlap.csv"])),
os.path.join(self.srna_path,
"_".join([prefix, "sRNA.gff"])),
os.path.join(self.gff_path, prefix + ".gff"))
def run_srna_target_prediction(self, args_tar):
self._check_gff(args_tar.gffs)
self._check_gff(args_tar.srnas)
self.multiparser.parser_gff(args_tar.gffs, None)
self.multiparser.parser_fasta(args_tar.fastas)
self.multiparser.parser_gff(args_tar.srnas, "sRNA")
prefixs = []
self._gen_seq(prefixs, args_tar)
if (args_tar.program == "both") or (
args_tar.program == "RNAplex"):
self._rna_plex(prefixs, args_tar)
self.helper.remove_all_content(self.target_seq_path,
"_target_", "file")
if (args_tar.program == "both") or (
args_tar.program == "RNAup"):
self._rnaup(prefixs, args_tar)
self._merge_rnaplex_rnaup(prefixs, args_tar)
if (args_tar.program == "RNAplex") or (
args_tar.program == "both"):
for strain in os.listdir(os.path.join(
args_tar.out_folder, "RNAplex")):
shutil.rmtree(os.path.join(args_tar.out_folder, "RNAplex",
strain, "RNAplfold"))
self.helper.remove_all_content(args_tar.out_folder,
self.tmps["tmp"], "dir")
self.helper.remove_all_content(args_tar.out_folder,
self.tmps["tmp"], "file")
self.helper.remove_tmp(args_tar.gffs)
self.helper.remove_tmp(args_tar.srnas)
self.helper.remove_tmp(args_tar.fastas)
self.helper.remove_all_content(self.srna_seq_path, "tmp_", "file")
示例9: RATT
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_fasta [as 别名]
#.........这里部分代码省略.........
"genbank or embl files!")
log.write("The strain names in --compare_pair should be the same "
"as the strain names in fasta, genbank, or embl files.\n")
sys.exit()
log.write("Make sure your RATT version is at least 1.64.\n")
log.write("If the RATT can not run properly, please check the "
"RATT_HOME and PAGIT_HOME is assigned correctly.\n")
log.write(" ".join([args_ratt.ratt_path, self.embl,
os.path.join(self.tmp_files["tar"], tar + ".fa"),
args_ratt.element, args_ratt.transfer_type,
os.path.join(self.tmp_files["ref"], ref + ".fa")]) + "\n")
call([args_ratt.ratt_path, self.embl,
os.path.join(self.tmp_files["tar"], tar + ".fa"),
args_ratt.element, args_ratt.transfer_type,
os.path.join(self.tmp_files["ref"], ref + ".fa")],
stdout=out, stderr=DEVNULL)
log.write("Done!\n")
def _format_and_run(self, args_ratt, log):
print("Running RATT")
for pair in args_ratt.pairs:
ref = pair.split(":")[0]
tar = pair.split(":")[1]
out = open(self.ratt_log, "w+")
self._run_ratt(args_ratt, tar, ref, out, log)
log.write("The following files are generatd:\n")
for filename in os.listdir():
if ("final" in filename):
log.write("\t" + filename + "\n")
shutil.move(filename, os.path.join(args_ratt.output_path,
filename))
elif (args_ratt.element in filename) or (
"query" in filename) or (
"Reference" in filename) or (
"Query" in filename) or (
"Sequences" in filename):
log.write("\t" + filename + "\n")
if os.path.isfile(filename):
os.remove(filename)
if os.path.isdir(filename):
shutil.rmtree(filename)
out.close()
def annotation_transfer(self, args_ratt, log):
self.multiparser.parser_fasta(args_ratt.tar_fastas)
self.multiparser.parser_fasta(args_ratt.ref_fastas)
out_gbk = None
if args_ratt.ref_embls is None:
out_gbk = self._convert_embl(args_ratt.ref_gbki, log)
self._format_and_run(args_ratt, log)
files = []
for data in os.listdir(args_ratt.output_path):
if "final.embl" in data:
log.write("Running converter.py to convert embl "
"files in {0} to gff, ptt, and rnt format.\n".format(data))
self._convert_to_gff(data, args_ratt, files, log)
self._convert_to_pttrnt(args_ratt.gff_outfolder, files, log)
self.helper.check_make_folder(self.tmp_files["out_gff"])
log.write("Merging the output of {0}.\n".format(data))
for folder in os.listdir(args_ratt.tar_fastas):
files = []
if "_folder" in folder:
datas = folder.split("_folder")
prefix = ".".join(datas[0].split(".")[:-1])
for file_ in os.listdir(os.path.join(args_ratt.tar_fastas,
folder)):
files.append(file_[:-3])
for gff in os.listdir(args_ratt.gff_outfolder):
for file_ in files:
if (".gff" in gff) and (file_ == gff[:-4]):
self.helper.merge_file(os.path.join(
args_ratt.gff_outfolder, gff),
self.tmp_files["gff"])
if (".ptt" in gff) and (file_ == gff[:-4]):
self.helper.merge_file(os.path.join(
args_ratt.gff_outfolder, gff),
self.tmp_files["ptt"])
if (".rnt" in gff) and (file_ == gff[:-4]):
self.helper.merge_file(os.path.join(
args_ratt.gff_outfolder, gff),
self.tmp_files["rnt"])
if os.path.exists(self.tmp_files["gff"]):
shutil.move(self.tmp_files["gff"], os.path.join(
self.tmp_files["out_gff"], prefix + ".gff"))
shutil.move(self.tmp_files["ptt"], os.path.join(
self.tmp_files["out_gff"], prefix + ".ptt"))
shutil.move(self.tmp_files["rnt"], os.path.join(
self.tmp_files["out_gff"], prefix + ".rnt"))
else:
print("Error: Please check your fasta or "
"annotation files, they should only contain "
"the query genome. And make sure your RATT can "
"work properly (check $ANNOgesic/output/"
"annotation_transfer/ratt_log.txt).")
log.write("Please check your fasta or "
"annotation files, they should only contain "
"the query genome. And make sure your RATT can "
"work properly (check $ANNOgesic/output/"
"annotation_transfer/ratt_log.txt).\n")
self._remove_files(args_ratt, out_gbk, log)
示例10: CircRNADetection
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_fasta [as 别名]
#.........这里部分代码省略.........
for prefix in tmp_prefixs:
self.helper.check_make_folder(os.path.join(self.gff_folder,
prefix))
shutil.copytree(prefix, os.path.join(self.splice_path, prefix))
self.helper.check_make_folder(os.path.join(
self.candidate_path, prefix))
print("comparing with annotation of {0}".format(prefix))
if self.splices["all_file"] in os.listdir(os.path.join(
self.splice_path, prefix)):
detect_circrna(os.path.join(self.splice_path, prefix,
self.splices["all_file"]), os.path.join(
self.gff_path, prefix + ".gff"),
os.path.join(self.candidate_path, prefix,
"_".join(["circRNA", prefix + "_all.csv"])),
args_circ, os.path.join(args_circ.stat_folder,
"_".join(["stat_circRNA", prefix + ".csv"])))
self.converter.convert_circ2gff(
os.path.join(self.candidate_path, prefix,
"_".join(["circRNA",
prefix + "_all.csv"])),
args_circ, os.path.join(
self.gff_folder, prefix,
"_".join([prefix, "circRNA_all.gff"])),
os.path.join(self.gff_folder, prefix,
"_".join([prefix, "circRNA_best.gff"])))
def _assign_merge_bam(self, args_circ):
remove_frags = []
bam_files = []
if (args_circ.normal_bams is not None) and (
args_circ.frag_bams is not None):
for frag in os.listdir(args_circ.frag_bams):
if frag.endswith(".bam"):
shutil.copyfile(os.path.join(args_circ.frag_bams, frag),
os.path.join(args_circ.normal_bams, frag))
remove_frags.append(frag)
merge_folder = args_circ.normal_bams
elif (args_circ.normal_bams is not None):
merge_folder = args_circ.normal_bams
elif (args_circ.frag_bams is not None):
merge_folder = args_circ.frag_bams
else:
print("Error: please assign bam folder or do alignment!!")
sys.exit()
for bam in os.listdir(merge_folder):
if bam.endswith(".bam"):
bam_files.append(os.path.join(merge_folder, bam))
return merge_folder, remove_frags, bam_files
def run_circrna(self, args_circ):
for gff in os.listdir(args_circ.gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(
args_circ.gffs, gff))
if args_circ.segemehl_path is None:
print("Error: please assign segemehl folder!!")
sys.exit()
self.multiparser.parser_gff(args_circ.gffs, None)
self.multiparser.combine_gff(args_circ.fastas, self.gff_path,
"fasta", None)
tmp_reads = []
if args_circ.align:
self.multiparser.parser_fasta(args_circ.fastas)
tmp_reads = self._deal_zip_file(args_circ.read_folder)
align_files, prefixs = self._align(args_circ)
else:
self.multiparser.parser_fasta(args_circ.fastas)
prefixs = []
for fasta in os.listdir(self.fasta_path):
fasta_prefix = fasta.replace(".fa", "")
prefixs.append(fasta_prefix)
merge_folder, remove_frag, bam_files = self._assign_merge_bam(
args_circ)
align_files = None
for prefix in prefixs:
if args_circ.align:
sub_alignment_path = os.path.join(self.alignment_path, prefix)
bam_files, convert_ones, remove_ones = self._convert_sam2bam(
sub_alignment_path, args_circ.samtools_path, align_files)
else:
sub_alignment_path = merge_folder
convert_ones = []
remove_ones = []
self._merge_sort_aligment_file(
bam_files, args_circ.samtools_path, sub_alignment_path,
convert_ones, tmp_reads, remove_ones)
self._run_testrealign(prefix, args_circ.segemehl_path,
sub_alignment_path)
tmp_prefixs = self._merge_bed(args_circ.fastas, self.splice_path)
self.multiparser.parser_gff(args_circ.gffs, None)
self.multiparser.combine_gff(args_circ.fastas, self.gff_path,
"fasta", None)
self._stat_and_gen_gff(tmp_prefixs, args_circ)
self.helper.remove_tmp(args_circ.fastas)
self.helper.remove_tmp(args_circ.gffs)
for tmp_prefix in tmp_prefixs:
shutil.rmtree(tmp_prefix)
if (not args_circ.align) and (len(remove_frag) != 0):
for frag in remove_frag:
os.remove(os.path.join(merge_folder, frag))
示例11: MEME
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_fasta [as 别名]
#.........这里部分代码省略.........
fasta.endswith(".fasta")):
self.helper.merge_file(os.path.join(
args_pro.fastas, fasta), self.all_fasta)
print("generating fasta file of all fasta files")
prefixs.append("allfasta")
input_path = os.path.join(self.out_fasta, "allfasta")
self.helper.check_make_folder(os.path.join(
args_pro.output_folder, "allfasta"))
self.helper.check_make_folder(os.path.join(
self.out_fasta, "allfasta"))
args_pro.source = True
upstream(self.all_tss, self.all_fasta, None,
None, args_pro)
self._move_and_merge_fasta(input_path, "allfasta")
def _remove_files(self, args_pro):
self.helper.remove_tmp(args_pro.fastas)
self.helper.remove_tmp(args_pro.tsss)
self.helper.remove_tmp(args_pro.gffs)
self.helper.remove_tmp(args_pro.wigs)
if "allfasta.fa" in os.listdir(args_pro.fastas):
os.remove(self.all_fasta)
if "allfasta" in os.listdir(os.getcwd()):
shutil.rmtree("allfasta")
shutil.rmtree("tmp")
def _gen_table(self, output_folder, prefixs, combine):
if combine:
strains = prefixs + ["allfasta"]
else:
strains = prefixs
for strain in strains:
for folder in os.listdir(os.path.join(output_folder, strain)):
tss_file = os.path.join(self.tss_path, strain + "_TSS.gff")
gen_promoter_table(os.path.join(output_folder, strain,
folder, "meme.txt"),
os.path.join(output_folder, strain,
folder, "meme.csv"), tss_file)
def _get_upstream(self, args_pro, prefix, tss, fasta):
if args_pro.source:
print("generating fasta file of {0}".format(prefix))
upstream(os.path.join(self.tss_path, tss),
os.path.join(args_pro.fastas, fasta),
None, None, args_pro)
else:
if (args_pro.gffs is None) or (
args_pro.wigs is None) or (
args_pro.input_libs is None):
print("Error:please assign proper annotation, tex +/- "
"wig folder and tex treated libs!!!")
sys.exit()
if "TSS_class" not in os.listdir(args_pro.output_folder):
os.mkdir(os.path.join(args_pro.output_folder, "TSS_class"))
print("classifying TSS and extracting fasta {0}".format(prefix))
upstream(os.path.join(self.tss_path, tss),
os.path.join(args_pro.fastas, fasta),
os.path.join(self.gff_path, prefix + ".gff"),
os.path.join(args_pro.output_folder, "TSS_class",
"_".join([prefix, "TSS.gff"])), args_pro)
def run_meme(self, args_pro):
if "allfasta.fa" in os.listdir(args_pro.fastas):
os.remove(self.all_fasta)
if "allfasta.fa_folder" in os.listdir(args_pro.fastas):
shutil.rmtree(os.path.join(args_pro.fastas,
"allfasta.fa_folder"))
self.multiparser.parser_fasta(args_pro.fastas)
self.multiparser.parser_gff(args_pro.tsss, "TSS")
if "allfasta_TSS.gff" in os.listdir(self.tss_path):
os.remove(self.all_tss)
if args_pro.gffs is not None:
self._check_gff(args_pro.gffs)
self.multiparser.parser_gff(args_pro.gffs, None)
self.multiparser.combine_gff(args_pro.fastas, self.gff_path,
"fasta", None)
self._check_gff(args_pro.tsss)
self.multiparser.combine_gff(args_pro.fastas, self.tss_path,
"fasta", "TSS")
self.helper.check_make_folder(self.out_fasta)
self.helper.check_make_folder(self.tmp_folder)
prefixs = []
for tss in os.listdir(self.tss_path):
prefix = tss.replace("_TSS.gff", "")
prefixs.append(prefix)
self.helper.check_make_folder(os.path.join(args_pro.output_folder,
prefix))
self.helper.check_make_folder(os.path.join(self.out_fasta,
prefix))
input_path = os.path.join(self.out_fasta, prefix)
fasta = self._get_fasta_file(args_pro.fastas, prefix)
self._get_upstream(args_pro, prefix, tss, fasta)
self._move_and_merge_fasta(input_path, prefix)
self._split_fasta_by_strain(input_path)
if args_pro.combine:
self._combine_file(prefixs, args_pro)
self._run_program(prefixs, args_pro)
print("generating the table...")
self._gen_table(args_pro.output_folder, prefixs, args_pro.combine)
self._remove_files(args_pro)
示例12: TSSpredator
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_fasta [as 别名]
#.........这里部分代码省略.........
if tss.endswith("_processing.gff"):
ref = self.helper.get_correct_file(
args_tss.overlap_gffs, "_TSS.gff",
tss.replace("_processing.gff", ""), None, None)
filter_tss_pro(os.path.join(out_folder, tss),
ref, args_tss.program,
args_tss.cluster)
def _low_expression(self, args_tss, gff_folder):
'''deal with the low expressed TSS'''
prefix = None
self._merge_wigs(args_tss.wig_folder, "wig", args_tss.libs)
for gff in os.listdir(gff_folder):
if (args_tss.program.lower() == "tss") and (
gff.endswith("_TSS.gff")):
prefix = gff.replace("_TSS.gff", "")
elif (args_tss.program.lower() == "processing") and (
gff.endswith("_processing.gff")):
prefix = gff.replace("_processing.gff", "")
if prefix:
out = open(os.path.join(
self.stat_outfolder, prefix, "_".join([
"stat", prefix, "low_expression_cutoff.csv"])), "w")
out.write("\t".join(["Genome", "Cutoff_coverage"]) + "\n")
cutoff = filter_low_expression(
os.path.join(gff_folder, gff), args_tss,
"tmp/merge_forward.wig", "tmp/merge_reverse.wig",
"tmp/without_low_expression.gff")
out.write("\t".join([prefix, str(cutoff)]) + "\n")
os.remove(os.path.join(gff_folder, gff))
shutil.move("tmp/without_low_expression.gff",
os.path.join(gff_folder, gff))
prefix = None
out.close()
def run_tsspredator(self, args_tss, log):
input_folder = os.path.join(args_tss.out_folder, "configs")
for gff in os.listdir(args_tss.gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(
args_tss.gffs, gff))
self.helper.check_make_folder(self.gff_outfolder)
self.multiparser.parser_fasta(args_tss.fastas)
self.multiparser.parser_gff(args_tss.gffs, None)
self.multiparser.parser_wig(args_tss.wig_folder)
prefixs = self._set_gen_config(args_tss, input_folder, log)
for prefix in prefixs:
out_path = os.path.join(
self.master, "_".join(["MasterTable", prefix]))
config_file = os.path.join(
input_folder, "_".join(["config", prefix]) + ".ini")
self._start_to_run(args_tss.tsspredator_path, config_file,
out_path, prefix, log)
if os.path.exists(os.path.join(out_path, "TSSstatistics.tsv")):
shutil.move(os.path.join(out_path, "TSSstatistics.tsv"),
os.path.join(
self.stat_outfolder, "TSSstatistics.tsv"))
if args_tss.program.lower() == "ps":
args_tss.program = "processing"
self._convert_gff(prefixs, args_tss, log)
if args_tss.check_orphan:
print("checking the orphan TSSs")
log.write("Running check_orphan.py to re-check orphan TSSs.\n")
self._check_orphan(prefixs,
os.path.join(args_tss.wig_folder, "tmp"),
args_tss)
self.multiparser.combine_gff(args_tss.gffs, self.gff_outfolder,
None, args_tss.program)
datas = []
for gff in os.listdir(self.gff_outfolder):
if gff.endswith(".gff"):
gff_folder = gff.replace("".join(["_", args_tss.program,
".gff"]), "")
self.helper.check_make_folder(
os.path.join(self.stat_outfolder, gff_folder))
datas.append(gff_folder)
if args_tss.remove_low_expression is not None:
log.write("Running filter_low_expression.py to filter out "
"low expressed TSS/PS.\n")
self._low_expression(args_tss, self.gff_outfolder)
if args_tss.manual is not None:
self.multiparser.parser_gff(args_tss.manual, None)
self.multiparser.combine_gff(args_tss.gffs, self.manual_path,
None, None)
self.multiparser.combine_fasta(args_tss.gffs, self.fasta_path,
None)
self.multiparser.combine_wig(args_tss.gffs, self.wig_path,
None, args_tss.libs)
log.write("Running merge_manual.py to merge the manual TSSs.\n")
self._merge_manual(datas, args_tss)
log.write("Running filter_TSS_pro.py to deal with the overlap "
"position between TSS and PS.\n")
self._deal_with_overlap(self.gff_outfolder, args_tss)
log.write("Running stat_TSSpredator.py to do statistics.\n")
self._stat_tss(datas, args_tss.program, log)
if args_tss.validate:
self._validate(datas, args_tss, log)
if args_tss.ta_files is not None:
self._compare_ta(datas, args_tss, log)
self._remove_files(args_tss)
示例13: TargetFasta
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_fasta [as 别名]
class TargetFasta(object):
'''detection of sRNA target interaction'''
def __init__(self, tar_folder, ref_folder):
self.multiparser = Multiparser()
self.seq_editer = SeqEditer()
self.helper = Helper()
self.folders = {"tmp_tar": os.path.join(tar_folder, "tmp")}
def gen_folder(self, out_folder, ref_files):
new_ref_folder = os.path.join(out_folder, "tmp_reference")
self.helper.check_make_folder(new_ref_folder)
for file_ in ref_files:
shutil.copy(file_, new_ref_folder)
self.folders["tmp_ref"] = os.path.join(new_ref_folder, "tmp")
self.multiparser.parser_fasta(new_ref_folder)
if os.path.exists(os.path.join(out_folder, "fasta_files")):
shutil.rmtree(os.path.join(out_folder, "fasta_files"))
os.mkdir(os.path.join(out_folder, "fasta_files"))
if os.path.exists(self.folders["tmp_tar"]):
shutil.rmtree(self.folders["tmp_tar"])
os.mkdir(self.folders["tmp_tar"])
return new_ref_folder
def get_target_fasta(self, mut_table, tar_folder, ref_files,
out_name, out_folder, log):
new_ref_folder = self.gen_folder(out_folder, ref_files)
log.write("Running seq_editor.py for updating sequence.\n")
self.seq_editer.modify_seq(self.folders["tmp_ref"], mut_table,
self.folders["tmp_tar"], out_name)
print("Updating the reference sequences")
mh = open(mut_table, "r")
pre_strain = None
out = None
strain_num = 0
for row in csv.reader(mh, delimiter='\t'):
if not row[0].startswith("#"):
if (pre_strain != row[0]):
strain_num = strain_num + 1
tmp_tar_name = "_".join([out_name, row[0]]) + ".fa"
fasta = os.path.join(out_folder, "fasta_files",
tmp_tar_name)
if out is not None:
out.close()
out = open(fasta, "w")
if tmp_tar_name in os.listdir(self.folders["tmp_tar"]):
with open(os.path.join(
self.folders["tmp_tar"],
tmp_tar_name)) as f_h:
for line in f_h:
out.write(line)
else:
print("Error: No updated information of {0}.fa".format(
row[0]))
pre_strain = row[0]
out.close()
out_seq = out_name + ".fa"
if os.path.exists(out_seq):
os.remove(out_seq)
if strain_num == 1:
o_s = open(out_seq, "w")
for seq in os.listdir(os.path.join(out_folder, "fasta_files")):
if seq.endswith(".fa"):
with open(os.path.join(
out_folder, "fasta_files", seq)) as t_h:
for line in t_h:
if len(line) != 0:
if line.startswith(">"):
o_s.write(">" + out_name + "\n")
else:
o_s.write(line)
os.remove(os.path.join(out_folder, "fasta_files", seq))
o_s.close()
else:
for seq in os.listdir(os.path.join(out_folder, "fasta_files")):
if seq.endswith(".fa"):
os.system(" ".join(["cat", os.path.join(
out_folder, "fasta_files", seq),
">>", out_seq]))
os.remove(os.path.join(out_folder, "fasta_files", seq))
shutil.move(out_seq, os.path.join(
out_folder, "fasta_files", out_seq))
shutil.rmtree(self.folders["tmp_tar"])
shutil.rmtree(self.folders["tmp_ref"])
if "tmp_reference" in os.listdir(out_folder):
shutil.rmtree(new_ref_folder)
log.write("\t" + os.path.join(out_folder, "fasta_files", out_seq) +
" is generated.\n")
print("Please use the new fasta files to remapping again.")
示例14: TSSpredator
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_fasta [as 别名]
#.........这里部分代码省略.........
print("Comparing TSS and Processing site...")
if args_tss.program.lower() == "tss":
for tss in os.listdir(out_folder):
if tss.endswith("_TSS.gff"):
ref = self.helper.get_correct_file(
args_tss.references, "_processing.gff",
tss.replace("_TSS.gff", ""), None, None)
filter_tss_pro(os.path.join(out_folder, tss),
ref, args_tss.overlap_feature,
args_tss.cluster)
elif args_tss.program.lower() == "processing_site":
for tss in os.listdir(out_folder):
if tss.endswith("_processing.gff"):
ref = self.helper.get_correct_file(
args_tss.references, "_TSS.gff",
tss.replace("_processing.gff", ""), None, None)
filter_tss_pro(os.path.join(out_folder, tss),
ref, args_tss.overlap_feature,
args_tss.cluster)
def _low_expression(self, args_tss, gff_folder):
prefix = None
self._merge_wigs(args_tss.wig_folder, "wig", args_tss.libs)
for gff in os.listdir(gff_folder):
if (args_tss.program.lower() == "tss") and (
gff.endswith("_TSS.gff")):
prefix = gff.replace("_TSS.gff", "")
elif (args_tss.program.lower() == "processing") and (
gff.endswith("_processing.gff")):
prefix = gff.replace("_processing.gff", "")
if prefix:
out = open(os.path.join(
self.stat_outfolder, prefix, "_".join([
"stat", prefix, "low_expression_cutoff.csv"])), "w")
out.write("\t".join(["strain", "cutoff_coverage"]) + "\n")
cutoff = filter_low_expression(
os.path.join(gff_folder, gff), args_tss,
"tmp/merge_forward.wig", "tmp/merge_reverse.wig",
"tmp/without_low_expression.gff")
out.write("\t".join([prefix, str(cutoff)]) + "\n")
os.remove(os.path.join(gff_folder, gff))
shutil.move("tmp/without_low_expression.gff",
os.path.join(gff_folder, gff))
prefix = None
out.close()
def run_tsspredator(self, args_tss):
input_folder = os.path.join(args_tss.out_folder, "configs")
for gff in os.listdir(args_tss.gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(
args_tss.gffs, gff))
self.helper.check_make_folder(self.gff_outfolder)
self.multiparser.parser_fasta(args_tss.fastas)
self.multiparser.parser_gff(args_tss.gffs, None)
self.multiparser.parser_wig(args_tss.wig_folder)
prefixs = self._set_gen_config(args_tss, input_folder)
for prefix in prefixs:
out_path = os.path.join(
self.master, "_".join(["MasterTable", prefix]))
config_file = os.path.join(
input_folder, "_".join(["config", prefix]) + ".ini")
self._start_to_run(args_tss.tsspredator_path, config_file,
out_path, prefix)
if os.path.exists(os.path.join(out_path, "TSSstatistics.tsv")):
shutil.move(os.path.join(out_path, "TSSstatistics.tsv"),
os.path.join(
self.stat_outfolder, "TSSstatistics.tsv"))
if args_tss.program.lower() == "processing_site":
args_tss.program = "processing"
self._convert_gff(prefixs, args_tss)
if args_tss.check_orphan:
print("checking the orphan TSS...")
self._check_orphan(prefixs,
os.path.join(args_tss.wig_folder, "tmp"),
args_tss)
self.multiparser.combine_gff(args_tss.gffs, self.gff_outfolder,
None, args_tss.program)
datas = []
for gff in os.listdir(self.gff_outfolder):
if gff.endswith(".gff"):
gff_folder = gff.replace("".join(["_", args_tss.program,
".gff"]), "")
self.helper.check_make_folder(
os.path.join(self.stat_outfolder, gff_folder))
datas.append(gff_folder)
if args_tss.remove_low_expression is not None:
self._low_expression(args_tss, self.gff_outfolder)
if args_tss.manual is not None:
self.multiparser.combine_wig(args_tss.gffs, self.wig_path,
None, args_tss.libs)
self._merge_manual(datas, args_tss)
self._deal_with_overlap(self.gff_outfolder, args_tss)
if args_tss.stat:
self._stat_tss(datas, args_tss.program)
if args_tss.validate:
self._validate(datas, args_tss)
if args_tss.ta_files is not None:
self._compare_ta(datas, args_tss)
self._remove_files(args_tss)
示例15: SNPCalling
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import parser_fasta [as 别名]
#.........这里部分代码省略.........
elif fasta.endswith(".fasta"):
prefix = fasta[:-6]
detect = True
return (detect, prefix)
def _run_bam(self, samtools_path, sub_command, bam_file):
if sub_command == "merge":
command = (" ".join([samtools_path, sub_command,
self.bams["whole"], bam_file]))
elif sub_command == "sort":
command = (" ".join([samtools_path, sub_command,
"-o", bam_file, self.bams["whole"]]))
os.system(command)
def _merge_bams(self, args_snp):
bams = []
num_normal = 0
num_frag = 0
if (args_snp.frag_bams is None) and (args_snp.normal_bams is None):
print("Error: There is no BAMs folders!!")
sys.exit()
else:
if args_snp.normal_bams is not None:
num_normal = self._import_bam(args_snp.normal_bams, bams)
if args_snp.frag_bams is not None:
num_frag = self._import_bam(args_snp.frag_bams, bams)
num_bam = num_normal + num_frag
if num_bam <= 1:
shutil.copyfile(bams[0], self.bams["whole"])
print("Sort BAM file now ...")
self._run_bam(args_snp.samtools_path, "sort",
self.bams["sort"])
else:
print("Merge BAM files now ...")
self._run_bam(args_snp.samtools_path, "merge", " ".join(bams))
print("Sort BAM file now ...")
self._run_bam(args_snp.samtools_path, "sort",
self.bams["sort"])
return num_bam
def _modify_header(self, fastas):
for fasta in os.listdir(fastas):
if fasta.endswith("fasta") or \
fasta.endswith("fa") or \
fasta.endswith("fna"):
self.seq_editer.modify_header(os.path.join(fastas, fasta))
def _get_header(self, samtools_path):
command = " ".join([samtools_path, "view", "-H", self.bams["sort"]])
os.system(">".join([command, self.header]))
def _get_genome_name(self, samtools_path):
self._get_header(samtools_path)
fh = open(self.header, "r")
seq_names = []
for row in csv.reader(fh, delimiter="\t"):
if row[0] == "@SQ":
seq_names.append(row[1].split(":")[1])
fh.close()
return seq_names
def run_snp_calling(self, args_snp):
self.multiparser.parser_fasta(args_snp.fastas)
self._modify_header(args_snp.fastas)
bam_number = self._merge_bams(args_snp)
seq_names = self._get_genome_name(args_snp.samtools_path)
if ("1" not in args_snp.program) and (
"2" not in args_snp.program) and (
"3" not in args_snp.program):
print("Error:Please assign a correct BAQ type: "
"'1' means 'with_BAQ', '2' means 'with_BAQ' or "
"'3' means 'extend_BAQ'.")
sys.exit()
else:
for fasta in os.listdir(self.fasta_path):
if (fasta.split(".f")[0] in seq_names):
fasta_datas = self._detect_fasta(fasta)
detect = fasta_datas[0]
prefix = fasta_datas[1]
if detect:
detect = False
print("Computing {0} now ...".format(fasta))
self.helper.check_make_folder(
os.path.join(self.outputs["table"], prefix))
self.helper.check_make_folder(
os.path.join(self.outputs["raw"], prefix))
file_prefixs = {"raw_prefix": os.path.join(
self.outputs["raw"], prefix, prefix),
"table_prefix": os.path.join(
self.outputs["table"], prefix, prefix)}
fasta_file = os.path.join(self.fasta_path, fasta)
table_path = os.path.join(self.outputs["table"],
prefix)
self._run_program(fasta_file, file_prefixs, prefix,
bam_number, table_path, args_snp)
os.remove(self.outputs["tmp"])
self.helper.remove_tmp(args_snp.fastas)
os.remove(self.bams["whole"])
os.remove(self.bams["sort"])
os.remove(self.header)