本文整理汇总了Python中annogesiclib.helper.Helper.check_make_folder方法的典型用法代码示例。如果您正苦于以下问题:Python Helper.check_make_folder方法的具体用法?Python Helper.check_make_folder怎么用?Python Helper.check_make_folder使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类annogesiclib.helper.Helper
的用法示例。
在下文中一共展示了Helper.check_make_folder方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Screen
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_make_folder [as 别名]
class Screen(object):
'''generation of screenshot'''
def __init__(self, args_sc, out_folder):
self.helper = Helper()
args_sc.output_folder = out_folder
filename = args_sc.fasta.split("/")[-1]
self.strain = ".".join(filename.split(".")[0:-1])
self.helper.check_make_folder(os.path.join(args_sc.output_folder,
self.strain))
self.forward_file = os.path.join(args_sc.output_folder,
self.strain, "forward")
self.reverse_file = os.path.join(args_sc.output_folder,
self.strain, "reverse")
os.mkdir(self.forward_file)
os.mkdir(self.reverse_file)
def _import_libs(self, texs, strand, lib_dict):
if strand == "+":
tex = "ft"
notex = "fn"
else:
tex = "rt"
notex = "rn"
for flib in texs:
if (flib[1] == "tex"):
lib_dict[tex].append(flib[0])
for nlib in texs:
if (nlib[1] == "notex") and \
(flib[2] == nlib[2]) and \
(flib[3] == nlib[3]):
lib_dict[notex].append(nlib[0])
def screenshot(self, args_sc, log):
lib_dict = {"ft": [], "fn": [], "rt": [], "rn": [], "ff": [], "rf": []}
f_texs = []
r_texs = []
if args_sc.tlibs is not None:
for lib in args_sc.tlibs:
lib_datas = lib.split(":")
if not lib_datas[0].endswith(".wig"):
log.write("Wiggle files should end with .wig.\n")
print("Error: Wiggle files should end with .wig!")
sys.exit()
else:
if lib_datas[-1] == "+":
f_texs.append(lib_datas)
else:
r_texs.append(lib_datas)
f_texs = sorted(f_texs, key=lambda x: (x[1], x[2], x[3]))
r_texs = sorted(r_texs, key=lambda x: (x[1], x[2], x[3]))
self._import_libs(f_texs, "+", lib_dict)
self._import_libs(r_texs, "-", lib_dict)
if args_sc.flibs is not None:
for lib in args_sc.flibs:
lib_datas = lib.split(":")
if not lib_datas[0].endswith(".wig"):
log.write("Wiggle files should end with .wig.\n")
print("Error: Wiggle files should end with .wig!")
sys.exit()
else:
if lib_datas[-1] == "+":
lib_dict["ff"].append(lib_datas[0])
else:
lib_dict["rf"].append(lib_datas[0])
log.write("Running gen_screenshots.py to generate IGV batch script.\n")
gen_screenshot(args_sc, lib_dict, self.forward_file + ".txt",
self.reverse_file + ".txt", self.strain)
log.write("\t" + self.forward_file + ".txt is generated.\n")
log.write("\t" + self.reverse_file + ".txt is generated.\n")
if (args_sc.tlibs is None) and (args_sc.flibs is None):
log.write("No wig files can be found.\n")
print("Error: There is no wig file assigned!")
sys.exit()
示例2: PPINetwork
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_make_folder [as 别名]
#.........这里部分代码省略.........
for query in args_ppi.querys:
datas = query.split(":")
strain = datas[0]
start = datas[1]
end = datas[2]
strand = datas[3]
if (len(row) > 1) and (row[0] != "Location") and (
name == strain) and (
start == row[0].split("..")[0]) and (
end == row[0].split("..")[1]) and (
strand == row[1]):
genes.append({"strain": name, "locus_tag": row[5]})
fh.close()
return genes
def _setup_nospecific(self, paths, strain_id, files):
self._make_subfolder(
paths["all"], self.without_strain, strain_id["ptt"])
self._make_subfolder(
paths["best"], self.without_strain, strain_id["ptt"])
self._make_subfolder(
paths["fig"], self.without_strain, strain_id["ptt"])
filename_nostrain = "_".join([strain_id["file"].replace(".ptt", ""),
self.without_strain + ".csv"])
files["all_nospecific"] = open(os.path.join(paths["all"],
filename_nostrain), "w")
files["best_nospecific"] = open(os.path.join(paths["best"],
filename_nostrain), "w")
def _setup_folder_and_read_file(self, strain_id, pre_file,
files, paths, args_ppi):
if strain_id["file"].endswith(".ptt"):
if strain_id["file"] != pre_file:
self.helper.check_make_folder(
"_".join([self.tmp_id, strain_id["file"]]))
paths["all"] = os.path.join(
self.all_result, strain_id["file"][:-4])
paths["best"] = os.path.join(
self.best_result, strain_id["file"][:-4])
paths["fig"] = os.path.join(
self.fig, strain_id["file"][:-4])
self.helper.check_make_folder(
os.path.join(self.all_result, strain_id["file"][:-4]))
self.helper.check_make_folder(
os.path.join(self.best_result, strain_id["file"][:-4]))
self.helper.check_make_folder(
os.path.join(self.fig, strain_id["file"][:-4]))
self._make_subfolder(
paths["all"], self.with_strain, strain_id["ptt"])
self._make_subfolder(
paths["best"], self.with_strain, strain_id["ptt"])
self._make_subfolder(
paths["fig"], self.with_strain, strain_id["ptt"])
filename_strain = "_".join(
[strain_id["file"].replace(".ptt", ""),
self.with_strain + ".csv"])
files["all_specific"] = open(os.path.join(
paths["all"], filename_strain), "w")
files["best_specific"] = open(os.path.join(
paths["best"], filename_strain), "w")
if args_ppi.no_specific:
self._setup_nospecific(paths, strain_id, files)
files["id_list"] = "_".join([self.tmp_id, strain_id["file"]])
files["id_log"] = open(os.path.join(files["id_list"],
self.tmp_files["log"]), "w")
files["action_log"] = open(os.path.join(args_ppi.out_folder,
示例3: MEME
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_make_folder [as 别名]
#.........这里部分代码省略.........
with open(os.path.join(input_path, fasta), "r") as f_h:
for line in f_h:
line = line.strip()
if line.startswith(">"):
datas = line.split("_")
strain = "_".join(datas[2:])
if pre_strain != strain:
num_strain += 1
filename = fasta.split("allgenome")
if out is not None:
out.close()
out = open(os.path.join(
input_path, "".join([
filename[0], strain,
filename[-1]])), "a")
pre_strain = strain
out.write(line + "\n")
else:
out.write(line + "\n")
if num_strain <= 1:
os.remove(os.path.join(input_path,
"".join([filename[0], strain, filename[-1]])))
out.close()
def _run_program(self, prefixs, args_pro, log, input_fastas):
log.write("Using MEME or GLAM2 to predict promoter.\n")
log.write("Please make sure their versions are at least 4.11.1.\n")
log.write("If you are running for parallel, please make sure you "
"have install MPICH and its version is at least 3.2.\n")
for prefix in prefixs:
input_path = os.path.join(self.out_fasta, prefix)
out_path = os.path.join(args_pro.output_folder, prefix)
if args_pro.program.lower() == "both":
self.helper.check_make_folder(os.path.join(out_path, "MEME"))
self.helper.check_make_folder(os.path.join(out_path, "GLAM2"))
elif args_pro.program.lower() == "meme":
self.helper.check_make_folder(os.path.join(out_path, "MEME"))
elif args_pro.program.lower() == "glam2":
self.helper.check_make_folder(os.path.join(out_path, "GLAM2"))
for fasta in os.listdir(input_path):
filename = fasta.replace(".fa", "")
names = filename.split("_")
if (names[-1] in input_fastas) or (
("_".join(names[-2:]) == "all_types") and (
"all_types" in input_fastas)) or (
("_".join(names[-2:]) == "without_orphan") and (
"without_orphan" in input_fastas)):
for width in args_pro.widths:
print("Computing promoters of {0} - {1}".format(
fasta, width))
log.write("Computing promoters of {0} - length {1}.\n".format(
fasta, width))
if "-" in width:
self._run_small_motif(input_path, out_path, filename,
fasta, width, args_pro, log)
else:
self._run_normal_motif(input_path, out_path, filename,
fasta, width, args_pro, log)
log.write("Promoter search for {0} is done.\n".format(prefix))
log.write("All the output files from MEME or GLAM2 are generated "
"and stored in {0}.\n".format(out_path))
def _combine_file(self, prefixs, args_pro):
'''combine all TSS file in the input folder to generate the
global TSS for detecting the global promoter'''
if args_pro.source:
示例4: RATT
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_make_folder [as 别名]
class RATT(object):
def __init__(self, args_ratt):
self.multiparser = Multiparser()
self.converter = Converter()
self.format_fixer = FormatFixer()
self.helper = Helper()
self.gbk = os.path.join(args_ratt.ref_embls, "gbk_tmp")
self.gbk_tmp = os.path.join(self.gbk, "tmp")
self.embl = os.path.join(args_ratt.ref_embls, "embls")
self.ratt_log = os.path.join(args_ratt.output_path, "ratt_log.txt")
self.tmp_files = {"tar": os.path.join(args_ratt.tar_fastas, "tmp"),
"ref": os.path.join(args_ratt.ref_fastas, "tmp"),
"out_gff": os.path.join(args_ratt.gff_outfolder,
"tmp"),
"gff": os.path.join(args_ratt.gff_outfolder,
"tmp.gff"),
"ptt": os.path.join(args_ratt.gff_outfolder,
"tmp.ptt"),
"rnt": os.path.join(args_ratt.gff_outfolder,
"tmp.rnt")}
def _convert_to_pttrnt(self, gffs, files):
for gff in files:
if gff.endswith(".gff"):
gff = os.path.join(gffs, gff)
filename = gff.split("/")
prefix = filename[-1][:-4]
rnt = gff[:-3] + "rnt"
ptt = gff[:-3] + "ptt"
fasta = self.helper.get_correct_file(self.tmp_files["tar"],
".fa", prefix, None, None)
if fasta:
self.converter.convert_gff2rntptt(gff, fasta, ptt, rnt,
None, None)
def _remove_files(self, args_ratt, out_gbk):
self.helper.remove_all_content(args_ratt.gff_outfolder, ".gff", "file")
self.helper.remove_all_content(args_ratt.gff_outfolder, ".ptt", "file")
self.helper.remove_all_content(args_ratt.gff_outfolder, ".rnt", "file")
self.helper.move_all_content(self.tmp_files["out_gff"],
args_ratt.gff_outfolder, None)
shutil.rmtree(self.tmp_files["out_gff"])
shutil.rmtree(self.tmp_files["tar"])
shutil.rmtree(self.tmp_files["ref"])
shutil.rmtree(self.embl)
self.helper.remove_all_content(args_ratt.tar_fastas, "_folder", "dir")
self.helper.remove_all_content(args_ratt.ref_fastas, "_folder", "dir")
if out_gbk:
shutil.rmtree(out_gbk)
def _convert_to_gff(self, ratt_result, args_ratt, files):
name = ratt_result.split(".")
filename = ".".join(name[1:-2]) + ".gff"
output_file = os.path.join(args_ratt.output_path, filename)
self.converter.convert_embl2gff(
os.path.join(args_ratt.output_path, ratt_result), output_file)
self.format_fixer.fix_ratt(output_file, ".".join(name[1:-2]),
"tmp_gff")
shutil.move("tmp_gff", output_file)
shutil.copy(output_file, os.path.join(args_ratt.gff_outfolder,
filename))
files.append(filename)
def _parser_embl_gbk(self, files):
self.helper.check_make_folder(self.gbk)
for file_ in files:
close = False
with open(file_, "r") as f_h:
for line in f_h:
if (line.startswith("LOCUS")):
out = open(self.gbk_tmp, "w")
datas = line.split(" ")
for data in datas:
if (len(data) != 0) and (data != "LOCUS"):
filename = ".".join([data, "gbk"])
break
elif (line.startswith("VERSION")):
datas = line.split(" ")
for data in datas:
if (len(data) != 0) and (data != "VERSION"):
new_filename = ".".join([data, "gbk"])
break
if new_filename.find(filename):
filename = new_filename
if out:
out.write(line)
if line.startswith("//"):
out.close()
close = True
shutil.move(self.gbk_tmp,
os.path.join(self.gbk, filename))
if not close:
out.close()
return self.gbk
def _convert_embl(self, ref_embls):
detect_gbk = False
gbks = []
out_gbk = None
#.........这里部分代码省略.........
示例5: sRNADetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_make_folder [as 别名]
#.........这里部分代码省略.........
print("merging data of intergenic and UTR_derived sRNA...")
merge_srna_gff(gffs, args_srna.in_cds,
args_srna.cutoff_overlap, gff_file)
merge_srna_table(gffs["merge"], csvs, os.path.join(args_srna.wig_path,
"_".join([prefix, "forward.wig"])),
os.path.join(args_srna.wig_path,
"_".join([prefix, "reverse.wig"])),
tss, args_srna)
def _run_RNAfold(self, seq_file, vienna_path, sec_file):
os.system(" ".join(["cat", seq_file, "|",
os.path.join(vienna_path, "RNAfold"),
"-p", ">", sec_file]))
def _get_seq_sec(self, fasta_path, out_folder, prefix, sec_path,
dot_path, vienna_path):
detect = False
for fasta in os.listdir(fasta_path):
if fasta.endswith(".fa") and (
fasta.replace(".fa", "") == prefix):
detect = True
break
if detect:
detect = False
seq_file = os.path.join(out_folder, "_".join(["sRNA_seq", prefix]))
sec_file = os.path.join(out_folder, "_".join(["sRNA_2d", prefix]))
self.helper.get_seq("_".join([self.prefixs["basic"], prefix]),
os.path.join(fasta_path, fasta), seq_file)
else:
print("Error:There is not fasta file of {0}".format(prefix))
print("please check your imported information")
sys.exit()
tmp_path = os.path.join(out_folder, "tmp_srna")
self.helper.check_make_folder(tmp_path)
main_path = os.getcwd()
os.chdir(tmp_path)
sec_file = os.path.join(main_path, sec_file)
seq_file = os.path.join(main_path, seq_file)
tmp_sec_path = os.path.join(main_path, sec_path)
tmp_dot_path = os.path.join(main_path, dot_path)
self._run_RNAfold(seq_file, vienna_path, sec_file)
extract_energy(os.path.join(main_path,
"_".join([self.prefixs["basic"], prefix])),
sec_file, os.path.join(main_path,
"_".join([self.prefixs["energy"], prefix])))
for ps in os.listdir(os.getcwd()):
new_ps = ps.replace("|", "_")
shutil.move(ps, new_ps)
return {"sec": tmp_sec_path, "dot": tmp_dot_path, "main": main_path,
"tmp": os.path.join(main_path, tmp_path)}
def _run_replot(self, vienna_util, tmp_paths, file_, dot_file, rel_file):
os.system(" ".join([os.path.join(vienna_util, "relplot.pl"),
os.path.join(tmp_paths["tmp"], file_),
os.path.join(tmp_paths["tmp"], dot_file),
">", os.path.join(tmp_paths["tmp"], rel_file)]))
def _convert_pdf(self, ps2pdf14_path, tmp_paths, file_, pdf_file):
call([ps2pdf14_path, os.path.join(tmp_paths["tmp"], file_), pdf_file])
def _replot_sec_to_pdf(self, vienna_util, tmp_paths,
ps2pdf14_path, prefix):
for file_ in os.listdir(os.getcwd()):
if file_.endswith("ss.ps"):
dot_file = file_.replace("ss.ps", "dp.ps")
rel_file = file_.replace("ss.ps", "rss.ps")
示例6: GoTermFinding
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_make_folder [as 别名]
class GoTermFinding(object):
'''Retrieving the GO term'''
def __init__(self, args_go):
self.multiparser = Multiparser()
self.helper = Helper()
self.out_all = os.path.join(args_go.out_folder, "all_CDSs")
self.out_express = os.path.join(args_go.out_folder, "expressed_CDSs")
self.result_all_path = os.path.join(self.out_all, "GO_term_results")
self.result_express_path = os.path.join(self.out_express,
"GO_term_results")
self.gff_path = os.path.join(args_go.gffs, "tmp")
if args_go.trans is not None:
self.tran_path = os.path.join(args_go.trans, "tmp")
else:
self.tran_path = None
self.stat_all_path = os.path.join(self.out_all, "statistics")
self.stat_express_path = os.path.join(self.out_express,
"statistics")
self.all_strain = "all_genomes_uniprot.csv"
def _retrieve_go(self, uniprot, out_path, type_, log):
prefixs = []
log.write("Running gene_ontology.py to retrieve GO terms.\n")
for gff in os.listdir(self.gff_path):
prefix = gff.replace(".gff", "")
prefixs.append(prefix)
self.helper.check_make_folder(os.path.join(out_path, prefix))
out_file = os.path.join(out_path, prefix,
"_".join([prefix, "uniprot.csv"]))
print("Extracting GO terms of {0} from UniProt".format(prefix))
if self.tran_path is not None:
tran_file = os.path.join(self.tran_path,
"_".join([prefix, "transcript.gff"]))
else:
tran_file = None
retrieve_uniprot(uniprot, os.path.join(self.gff_path, gff),
out_file, tran_file, type_)
log.write("\t" + out_file + " is generated.\n")
def _remove_header(self, out_all):
out = open(out_all + "_tmp", "w")
fh = open(out_all, "r")
out.write("\t".join(["Genome", "Strand", "Start", "End",
"Protein_id", "Go_term"]) + "\n")
for row in csv.reader(fh, delimiter='\t'):
if row[0] != "Genome":
out.write("\t".join(row) + "\n")
out.close()
fh.close()
shutil.move(out_all + "_tmp", out_all)
def _merge_files(self, gffs, out_path, out_folder, log):
'''merge the files according to the input genome folder'''
folders = []
log.write("Merging the output files based on the input genome "
"information.\n")
for folder in os.listdir(gffs):
if folder.endswith("gff_folder"):
folder_prefix = folder.replace(".gff_folder", "")
folder_path = os.path.join(out_folder, folder_prefix)
self.helper.check_make_folder(folder_path)
folders.append(folder_path)
filenames = []
for gff in os.listdir(os.path.join(gffs, folder)):
if gff.endswith(".gff"):
filenames.append(gff.replace(".gff", ""))
out_all = os.path.join(folder_path, self.all_strain)
if len(filenames) > 1:
if self.all_strain in os.listdir(folder_path):
os.remove(out_all)
for filename in filenames:
csv_file = "_".join([filename, "uniprot.csv"])
self.helper.merge_file(os.path.join(out_path,
filename, csv_file), out_all)
self._remove_header(out_all)
shutil.copy(os.path.join(out_path, filename, csv_file),
folder_path)
else:
shutil.copyfile(os.path.join(out_path, filenames[0],
"_".join([filenames[0], "uniprot.csv"])),
out_all)
self.helper.remove_all_content(out_path, None, "dir")
self.helper.remove_all_content(out_path, None, "file")
for folder in folders:
folder_prefix = folder.split("/")[-1]
shutil.move(folder, os.path.join(out_path, folder_prefix))
for file_ in os.listdir(os.path.join(out_path, folder_prefix)):
log.write("\t" + os.path.join(out_path, folder_prefix, file_) +
" is generated.\n")
def _stat(self, out_path, stat_path, go, goslim, out_folder, log):
log.write("Running gene_ontology.py to Retrieve GOslim terms and "
"do statistics.\n")
log.write("The following files are generated:\n")
for folder in os.listdir(out_path):
strain_stat_path = os.path.join(stat_path, folder)
self.helper.check_make_folder(strain_stat_path)
fig_path = os.path.join(strain_stat_path, "figs")
if "fig" not in os.listdir(strain_stat_path):
#.........这里部分代码省略.........
示例7: Ribos
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_make_folder [as 别名]
class Ribos(object):
def __init__(self, args_ribo):
self.multiparser = Multiparser()
self.helper = Helper()
self.gff_parser = Gff3Parser()
self.gff_path = os.path.join(args_ribo.gffs, "tmp")
self.tss_path = os.path.join(args_ribo.tsss, "tmp")
self.tran_path = os.path.join(args_ribo.trans, "tmp")
self.fasta_path = os.path.join(args_ribo.fastas, "tmp")
self.stat_folder = os.path.join(args_ribo.out_folder, "statistics")
self.gff_outfolder = os.path.join(args_ribo.out_folder, "gffs")
self.table_folder = os.path.join(args_ribo.out_folder, "tables")
self.scan_folder = os.path.join(args_ribo.out_folder, "scan_Rfam")
self.ribos_rfam = os.path.join(args_ribo.database,
"Rfam_riboswitch.cm")
self.tmp_files = {"fasta": os.path.join(
args_ribo.out_folder, "tmp_fasta"),
"scan": os.path.join(
args_ribo.out_folder, "tmp_scan"),
"table": os.path.join(
args_ribo.out_folder, "tmp_table")}
self.suffixs = {"csv": "riboswitch.csv",
"txt": "riboswitch_prescan.txt",
"re_txt": "riboswitch_scan.txt",
"re_csv": "riboswitch_scan.csv"}
def _run_infernal(self, args_ribo, seq, type_, prefix):
scan_file = os.path.join(self.tmp_files["scan"],
"_".join([prefix, self.suffixs[type_]]))
scan = open(scan_file, "w")
call([os.path.join(args_ribo.infernal_path, "cmscan"), "--incE",
str(args_ribo.e_value), "--acc", self.ribos_rfam, seq],
stdout=scan)
scan.close()
return scan_file
def _scan_extract_rfam(self, prefixs, args_ribo):
for gff in os.listdir(self.gff_path):
if gff.endswith(".gff"):
prefix = gff.replace(".gff", "")
first_seq = os.path.join(self.tmp_files["fasta"],
prefix + ".fa")
prefixs.append(prefix)
print("extracting seq of riboswitch candidates of {0}".format(
prefix))
extract_potential_rbs(
os.path.join(self.fasta_path, prefix + ".fa"),
os.path.join(self.gff_path, gff),
os.path.join(self.tss_path, prefix + "_TSS.gff"),
os.path.join(self.tran_path, prefix + "_transcript.gff"),
first_seq, args_ribo)
print("pre-scanning of {0}".format(prefix))
first_scan_file = self._run_infernal(args_ribo, first_seq,
"txt", prefix)
sec_seq = os.path.join(self.tmp_files["fasta"],
"_".join([prefix, "regenerate.fa"]))
first_table = os.path.join(
self.tmp_files["table"],
"_".join([prefix, self.suffixs["csv"]]))
regenerate_seq(first_scan_file, first_seq,
first_table, sec_seq)
print("scanning of {0}".format(prefix))
sec_scan_file = self._run_infernal(args_ribo, sec_seq,
"re_txt", prefix)
sec_table = os.path.join(
self.tmp_files["table"],
"_".join([prefix, self.suffixs["re_csv"]]))
reextract_rbs(sec_scan_file, first_table, sec_table)
shutil.move(sec_table, first_table)
modify_table(first_table, args_ribo.output_all)
return prefixs
def _merge_results(self, args_ribo):
for gff in os.listdir(args_ribo.gffs):
if gff.endswith(".gff"):
prefix = gff.replace(".gff", "")
print("Merge results of {0}".format(prefix))
pre_strain = ""
self.helper.check_make_folder(os.path.join(
self.scan_folder, prefix))
fh = open(os.path.join(args_ribo.gffs, gff))
for entry in self.gff_parser.entries(fh):
if entry.seq_id != pre_strain:
if len(pre_strain) == 0:
shutil.copyfile(os.path.join(
self.tmp_files["table"],
"_".join([entry.seq_id, self.suffixs["csv"]])),
os.path.join(
self.table_folder,
"_".join([prefix, self.suffixs["csv"]])))
else:
self.helper.merge_file(os.path.join(
self.tmp_files["table"],
"_".join([entry.seq_id, self.suffixs["csv"]])),
os.path.join(
self.table_folder,
"_".join([prefix, self.suffixs["csv"]])))
shutil.copy(os.path.join(
self.tmp_files["scan"],
#.........这里部分代码省略.........
示例8: Screen
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_make_folder [as 别名]
class Screen(object):
def __init__(self, args_sc):
self.multiparser = Multiparser()
self.helper = Helper()
out_folder = os.path.join(args_sc.output_folder, "screenshots")
if os.path.exists(out_folder):
print("Error: The {0} already exist!!!".format(
out_folder))
sys.exit()
else:
os.mkdir(out_folder)
args_sc.output_folder = out_folder
filename = args_sc.fasta.split("/")[-1]
self.strain = ".".join(filename.split(".")[0:-1])
self.helper.check_make_folder(os.path.join(args_sc.output_folder,
self.strain))
self.forward_file = os.path.join(args_sc.output_folder,
self.strain, "forward")
self.reverse_file = os.path.join(args_sc.output_folder,
self.strain, "reverse")
os.mkdir(self.forward_file)
os.mkdir(self.reverse_file)
def _import_libs(self, texs, strand, wig_path, lib_dict):
if strand == "+":
tex = "ft"
notex = "fn"
else:
tex = "rt"
notex = "rn"
for flib in texs:
if (flib[1] == "tex"):
lib_dict[tex].append(os.path.join(wig_path, flib[0]))
for nlib in texs:
if (nlib[1] == "notex") and \
(flib[2] == nlib[2]) and \
(flib[3] == nlib[3]):
lib_dict[notex].append(os.path.join(wig_path, nlib[0]))
def screenshot(self, args_sc):
lib_dict = {"ft": [], "fn": [], "rt": [], "rn": [], "ff": [], "rf": []}
f_texs = []
r_texs = []
if args_sc.tlibs is not None:
for lib in args_sc.tlibs:
lib_datas = lib.split(":")
if not lib_datas[0].endswith(".wig"):
print("Error:Exist a not proper wig files!!")
sys.exit()
else:
if lib_datas[-1] == "+":
f_texs.append(lib_datas)
else:
r_texs.append(lib_datas)
f_texs = sorted(f_texs, key=lambda x: (x[1], x[2], x[3]))
r_texs = sorted(r_texs, key=lambda x: (x[1], x[2], x[3]))
self._import_libs(f_texs, "+", args_sc.tex_wigs, lib_dict)
self._import_libs(r_texs, "-", args_sc.tex_wigs, lib_dict)
if args_sc.flibs is not None:
for lib in args_sc.flibs:
lib_datas = lib.split(":")
if not lib_datas[0].endswith(".wig"):
print("Error:Exist a not proper wig files!!")
sys.exit()
else:
if lib_datas[-1] == "+":
lib_dict["ff"].append(os.path.join(
args_sc.frag_wigs, lib_datas[0]))
else:
lib_dict["rf"].append(os.path.join(
args_sc.frag_wigs, lib_datas[0]))
gen_screenshot(args_sc, lib_dict, self.forward_file + ".txt",
self.reverse_file + ".txt", self.strain)
if (args_sc.tlibs is None) and (args_sc.flibs is None):
print("Error: There are no wig file assigned!!!")
sys.exit()
示例9: TargetFasta
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_make_folder [as 别名]
class TargetFasta(object):
'''detection of sRNA target interaction'''
def __init__(self, tar_folder, ref_folder):
self.multiparser = Multiparser()
self.seq_editer = SeqEditer()
self.helper = Helper()
self.folders = {"tmp_tar": os.path.join(tar_folder, "tmp")}
def gen_folder(self, out_folder, ref_files):
new_ref_folder = os.path.join(out_folder, "tmp_reference")
self.helper.check_make_folder(new_ref_folder)
for file_ in ref_files:
shutil.copy(file_, new_ref_folder)
self.folders["tmp_ref"] = os.path.join(new_ref_folder, "tmp")
self.multiparser.parser_fasta(new_ref_folder)
if os.path.exists(os.path.join(out_folder, "fasta_files")):
shutil.rmtree(os.path.join(out_folder, "fasta_files"))
os.mkdir(os.path.join(out_folder, "fasta_files"))
if os.path.exists(self.folders["tmp_tar"]):
shutil.rmtree(self.folders["tmp_tar"])
os.mkdir(self.folders["tmp_tar"])
return new_ref_folder
def get_target_fasta(self, mut_table, tar_folder, ref_files,
out_name, out_folder, log):
new_ref_folder = self.gen_folder(out_folder, ref_files)
log.write("Running seq_editor.py for updating sequence.\n")
self.seq_editer.modify_seq(self.folders["tmp_ref"], mut_table,
self.folders["tmp_tar"], out_name)
print("Updating the reference sequences")
mh = open(mut_table, "r")
pre_strain = None
out = None
strain_num = 0
for row in csv.reader(mh, delimiter='\t'):
if not row[0].startswith("#"):
if (pre_strain != row[0]):
strain_num = strain_num + 1
tmp_tar_name = "_".join([out_name, row[0]]) + ".fa"
fasta = os.path.join(out_folder, "fasta_files",
tmp_tar_name)
if out is not None:
out.close()
out = open(fasta, "w")
if tmp_tar_name in os.listdir(self.folders["tmp_tar"]):
with open(os.path.join(
self.folders["tmp_tar"],
tmp_tar_name)) as f_h:
for line in f_h:
out.write(line)
else:
print("Error: No updated information of {0}.fa".format(
row[0]))
pre_strain = row[0]
out.close()
out_seq = out_name + ".fa"
if os.path.exists(out_seq):
os.remove(out_seq)
if strain_num == 1:
o_s = open(out_seq, "w")
for seq in os.listdir(os.path.join(out_folder, "fasta_files")):
if seq.endswith(".fa"):
with open(os.path.join(
out_folder, "fasta_files", seq)) as t_h:
for line in t_h:
if len(line) != 0:
if line.startswith(">"):
o_s.write(">" + out_name + "\n")
else:
o_s.write(line)
os.remove(os.path.join(out_folder, "fasta_files", seq))
o_s.close()
else:
for seq in os.listdir(os.path.join(out_folder, "fasta_files")):
if seq.endswith(".fa"):
os.system(" ".join(["cat", os.path.join(
out_folder, "fasta_files", seq),
">>", out_seq]))
os.remove(os.path.join(out_folder, "fasta_files", seq))
shutil.move(out_seq, os.path.join(
out_folder, "fasta_files", out_seq))
shutil.rmtree(self.folders["tmp_tar"])
shutil.rmtree(self.folders["tmp_ref"])
if "tmp_reference" in os.listdir(out_folder):
shutil.rmtree(new_ref_folder)
log.write("\t" + os.path.join(out_folder, "fasta_files", out_seq) +
" is generated.\n")
print("Please use the new fasta files to remapping again.")
示例10: Controller
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_make_folder [as 别名]
class Controller(object):
"""Manage the actions of the subcommands.
The Controller take care of providing the argumentes like path
names and the parallel processing of tasks.
"""
def __init__(self, args):
"""Create an instance."""
self._args = args
self._paths = Paths(args.project_path)
self.args_container = ArgsContainer()
self.helper = Helper()
def check_folder(self, folders):
for folder in folders:
if folder is None:
print("Error: There is wrong path of folder assigned, "
"please check it!!")
sys.exit()
else:
if os.path.exists(folder):
if len(os.listdir(folder)) == 0:
print("Error: There is empty folder, "
"please check it!!")
sys.exit()
else:
print("Error: There is wrong folder, please check it!!")
sys.exit()
def check_parameter(self, paras, names):
for i in range(len(paras)):
if paras[i] is None:
print("Error: {0} is wrong, "
"please check it!!".format(names[i]))
sys.exit()
def check_no_require_folder(self, folders):
for folder in folders:
if folder is not None:
if os.path.exists(folder):
if len(os.listdir(folder)) == 0:
print("Error: There is empty folder, "
"please check it!!")
sys.exit()
else:
print("Error: There is wrong folder, "
"please check it!!")
sys.exit()
def check_file(self, files, names, require):
for i in range(len(files)):
if require:
if files[i] is None:
print("Error: {0} is wrong, "
"please check it!!".format(names[i]))
sys.exit()
else:
if not os.path.isfile(files[i]):
print("Error: There is wrong path of {0}, "
"please check it!!".format(names[i]))
sys.exit()
else:
if files[i] is not None:
if not os.path.isfile(files[i]):
print("Error: There is wrong path of {0}, "
"please check it!!".format(names[i]))
sys.exit()
def create_project(self, version):
"""Create a new project."""
project_creator.create_root_folder(self._args.project_path)
project_creator.create_subfolders(self._paths.required_folders("root"))
project_creator.create_subfolders(
self._paths.required_folders("get_target_fasta"))
project_creator.create_version_file(
self._paths.version_path, version)
sys.stdout.write("Created folder \"%s\" and required subfolders.\n" % (
self._args.project_path))
def get_input(self):
"""Download required files from website."""
print("Running get input files...")
if self._args.FTP_path is None:
print("Error: Please assign the path for downloading the data!!")
sys.exit()
if self._args.for_target:
annotation_folder = self._paths.tar_annotation_folder
fasta_folder = self._paths.tar_fasta_folder
else:
annotation_folder = self._paths.ref_annotation_folder
fasta_folder = self._paths.ref_fasta_folder
self.helper.check_make_folder(annotation_folder)
self.helper.check_make_folder(fasta_folder)
if self._args.ref_gff is True:
get_file(self._args.FTP_path, annotation_folder,
"gff", self._args.for_target)
get_file(self._args.FTP_path, annotation_folder,
"_genomic.gff.gz", self._args.for_target)
#.........这里部分代码省略.........
示例11: TSSpredator
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_make_folder [as 别名]
#.........这里部分代码省略.........
if (lib_datas[1] == "tex") and (lib_datas[4] == "+"):
lib_dict["fp"].append(self._assign_dict(lib_datas))
elif (lib_datas[1] == "tex") and (lib_datas[4] == "-"):
lib_dict["fm"].append(self._assign_dict(lib_datas))
elif (lib_datas[1] == "notex") and (lib_datas[4] == "+"):
lib_dict["np"].append(self._assign_dict(lib_datas))
elif (lib_datas[1] == "notex") and (lib_datas[4] == "-"):
lib_dict["nm"].append(self._assign_dict(lib_datas))
for num_id in range(1, lib_num+1):
out.write("annotation_{0} = {1}\n".format(num_id, gff))
if program.lower() == "tss":
self._print_lib(lib_num, lib_dict["fm"], out,
wig_folder, "fivePrimeMinus")
self._print_lib(lib_num, lib_dict["fp"], out,
wig_folder, "fivePrimePlus")
elif program.lower() == "processing_site":
self._print_lib(lib_num, lib_dict["nm"], out,
wig_folder, "fivePrimeMinus")
self._print_lib(lib_num, lib_dict["np"], out,
wig_folder, "fivePrimePlus")
else:
print("Error: Wrong program name!!!")
sys.exit()
for num_id in range(1, lib_num+1):
out.write("genome_{0} = {1}\n".format(num_id, fasta))
for num_id in range(1, lib_num+1):
list_num_id.append(str(num_id))
return lib_num, num_id, rep_set, lib_dict, list_num_id
def _gen_config(self, project_strain_name, args_tss, gff,
wig_folder, fasta, config_file):
master_folder = "MasterTable_" + project_strain_name
out_path = os.path.join(self.master, master_folder)
self.helper.check_make_folder(out_path)
out = open(config_file, "w")
out.write("TSSinClusterSelectionMethod = HIGHEST\n")
out.write("allowedCompareShift = 1\n")
out.write("allowedRepCompareShift = 1\n")
lib_num, num_id, rep_set, lib_dict, list_num_id = \
self._import_lib(args_tss.libs, wig_folder, project_strain_name,
out, gff, args_tss.program, fasta)
out.write("idList = ")
out.write(",".join(list_num_id) + "\n")
out.write("maxASutrLength = 100\n")
out.write("maxGapLengthInGene = 500\n")
out.write("maxNormalTo5primeFactor = {0}\n".format(
args_tss.processing_factor))
out.write("maxTSSinClusterDistance = {0}\n".format(
args_tss.cluster + 1))
out.write("maxUTRlength = {0}\n".format(args_tss.utr_length))
out.write("min5primeToNormalFactor = {0}\n".format(
args_tss.enrichment_factor))
out.write("minCliffFactor = {0}\n".format(args_tss.factor))
out.write("minCliffFactorDiscount = {0}\n".format(
args_tss.factor_reduction))
out.write("minCliffHeight = {0}\n".format(args_tss.height))
out.write("minCliffHeightDiscount = {0}\n".format(
args_tss.height_reduction))
out.write("minNormalHeight = {0}\n".format(args_tss.base_height))
out.write("minNumRepMatches = {0}\n".format(args_tss.repmatch))
out.write("minPlateauLength = 0\n")
out.write("mode = cond\n")
out.write("normPercentile = 0.9\n")
if args_tss.program.lower() == "tss":
self._print_lib(lib_num, lib_dict["nm"], out,
wig_folder, "normalMinus")
示例12: GoTermFinding
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_make_folder [as 别名]
class GoTermFinding(object):
def __init__(self, args_go):
self.multiparser = Multiparser()
self.helper = Helper()
self.out_all = os.path.join(args_go.out_folder, "all_CDS")
self.out_express = os.path.join(args_go.out_folder, "expressed_CDS")
self.result_all_path = os.path.join(self.out_all, "Go_term_results")
self.result_express_path = os.path.join(self.out_express,
"Go_term_results")
self.gff_path = os.path.join(args_go.gffs, "tmp")
if args_go.trans is not None:
self.tran_path = os.path.join(args_go.trans, "tmp")
else:
self.tran_path = None
self.stat_all_path = os.path.join(self.out_all, "statistics")
self.stat_express_path = os.path.join(self.out_express,
"statistics")
self.all_strain = "all_strains_uniprot.csv"
def _retrieve_go(self, uniprot, out_path, type_):
prefixs = []
for gff in os.listdir(self.gff_path):
prefix = gff.replace(".gff", "")
prefixs.append(prefix)
self.helper.check_make_folder(os.path.join(out_path, prefix))
out_file = os.path.join(out_path, prefix,
"_".join([prefix, "uniprot.csv"]))
print("extracting Go terms of {0} from UniProt...".format(prefix))
if self.tran_path is not None:
tran_file = os.path.join(self.tran_path,
"_".join([prefix, "transcript.gff"]))
else:
tran_file = None
retrieve_uniprot(uniprot, os.path.join(self.gff_path, gff),
out_file, tran_file, type_)
def _merge_files(self, gffs, out_path, out_folder):
folders = []
for folder in os.listdir(gffs):
if folder.endswith("gff_folder"):
folder_prefix = folder.replace(".gff_folder", "")
folder_path = os.path.join(out_folder, folder_prefix)
self.helper.check_make_folder(folder_path)
folders.append(folder_path)
filenames = []
for gff in os.listdir(os.path.join(gffs, folder)):
if gff.endswith(".gff"):
filenames.append(gff.replace(".gff", ""))
out_all = os.path.join(folder_path, self.all_strain)
if len(filenames) > 1:
if self.all_strain in os.listdir(folder_path):
os.remove(out_all)
for filename in filenames:
csv_file = "_".join([filename, "uniprot.csv"])
self.helper.merge_file(os.path.join(out_path,
filename, csv_file), out_all)
shutil.copy(os.path.join(out_path, filename, csv_file),
folder_path)
else:
shutil.copyfile(os.path.join(out_path, filenames[0],
"_".join([filenames[0], "uniprot.csv"])),
out_all)
self.helper.remove_all_content(out_path, None, "dir")
self.helper.remove_all_content(out_path, None, "file")
for folder in folders:
folder_prefix = folder.split("/")[-1]
shutil.move(folder, os.path.join(out_path, folder_prefix))
def _stat(self, out_path, stat_path, go, goslim, out_folder):
for folder in os.listdir(out_path):
strain_stat_path = os.path.join(stat_path, folder)
self.helper.check_make_folder(strain_stat_path)
fig_path = os.path.join(strain_stat_path, "figs")
if "fig" not in os.listdir(strain_stat_path):
os.mkdir(fig_path)
print("Computing statistics of {0}".format(folder))
map2goslim(goslim, go,
os.path.join(out_path, folder, self.all_strain),
os.path.join(strain_stat_path,
"_".join(["stat", folder + ".csv"])),
out_folder)
self.helper.move_all_content(out_folder, fig_path,
["_three_roots.png"])
self.helper.move_all_content(out_folder, fig_path,
["_molecular_function.png"])
self.helper.move_all_content(out_folder, fig_path,
["_cellular_component.png"])
self.helper.move_all_content(out_folder, fig_path,
["_biological_process.png"])
def run_go_term(self, args_go):
for gff in os.listdir(args_go.gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(
args_go.gffs, gff))
self.multiparser.parser_gff(args_go.gffs, None)
if args_go.trans is not None:
self.multiparser.parser_gff(args_go.trans, "transcript")
print("Computing all CDS...")
#.........这里部分代码省略.........
示例13: TranscriptAssembly
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_make_folder [as 别名]
#.........这里部分代码省略.........
self.gff_outfolder, "_".join([strain, self.tex]))
final_gff = os.path.join(
self.gff_outfolder,
"_".join([strain, self.endfix_tran]))
shutil.move(tex_gff, final_gff)
def _post_modify(self, tas, args_tran):
for ta in tas:
for gff in os.listdir(args_tran.gffs):
if (".gff" in gff) and (gff[:-4] == ta):
break
print("Modifying {0} refering to {1}...".format(ta, gff))
fill_gap(os.path.join(args_tran.gffs, gff),
os.path.join(self.tran_path,
"_".join([ta, self.endfix_tran])),
"overlap", self.tmps["overlap"])
fill_gap(os.path.join(args_tran.gffs, gff),
os.path.join(self.tran_path,
"_".join([ta, self.endfix_tran])),
"uni", self.tmps["uni"])
tmp_merge = os.path.join(self.gff_outfolder, self.tmps["merge"])
if self.tmps["merge"] in self.gff_outfolder:
os.remove(tmp_merge)
self.helper.merge_file(self.tmps["overlap"], tmp_merge)
self.helper.merge_file(self.tmps["uni"], tmp_merge)
tmp_out = os.path.join(self.gff_outfolder, "_".join(["tmp", ta]))
self.helper.sort_gff(tmp_merge, tmp_out)
os.remove(self.tmps["overlap"])
os.remove(self.tmps["uni"])
os.remove(tmp_merge)
final_out = os.path.join(self.gff_outfolder,
"_".join(["final", ta]))
longer_ta(tmp_out, args_tran.length, final_out)
shutil.move(final_out,
os.path.join(self.tmps["tran"],
"_".join([ta, self.endfix_tran])))
os.remove(tmp_out)
shutil.rmtree(self.gff_outfolder)
shutil.move(self.tmps["tran"], self.gff_outfolder)
def _remove_file(self, args_tran):
if args_tran.frag_wigs is not None:
self.helper.remove_wigs(args_tran.frag_wigs)
if args_tran.tex_wigs is not None:
self.helper.remove_wigs(args_tran.tex_wigs)
if args_tran.gffs is not None:
self.helper.remove_tmp(args_tran.gffs)
if args_tran.compare_cds is not None:
self.helper.remove_tmp(args_tran.compare_cds)
if args_tran.compare_tss is not None:
self.helper.remove_tmp(args_tran.compare_tss)
if args_tran.terms is not None:
self.helper.remove_tmp(args_tran.terms)
self.helper.remove_tmp(os.path.join(args_tran.out_folder, "gffs"))
self.helper.remove_tmp(self.gff_outfolder)
def _compare_term_tran(self, args_tran):
if args_tran.terms is not None:
print("comparing between terminators and transcripts...")
self.multiparser.parser_gff(args_tran.terms, "term")
self.multiparser.combine_gff(
args_tran.gffs,
os.path.join(args_tran.terms, "tmp"), None, "term")
compare_term_tran(self.gff_outfolder,
os.path.join(args_tran.terms, "tmp"),
args_tran.fuzzy_term, args_tran.fuzzy_term,
args_tran.out_folder, "transcript")
def run_transcript_assembly(self, args_tran):
if (args_tran.frag_wigs is None) and (args_tran.tex_wigs is None):
print("Error: there is no wigs files!!!!\n")
sys.exit()
if args_tran.frag_wigs is not None:
strains = self._for_one_wig("fragment", args_tran)
if args_tran.tex_wigs is not None:
strains = self._for_one_wig("tex_notex", args_tran)
self._for_two_wigs(strains, args_tran)
tas = []
if args_tran.gffs is not None:
for gff in os.listdir(args_tran.gffs):
if gff.endswith(".gff"):
self.helper.sort_gff(os.path.join(args_tran.gffs, gff),
self.tmps["gff"])
shutil.move(self.tmps["gff"],
os.path.join(args_tran.gffs, gff))
self.multiparser.combine_gff(args_tran.gffs, os.path.join(
args_tran.gffs, "tmp"), None, None)
self.multiparser.parser_gff(self.gff_outfolder, "transcript")
self.multiparser.combine_gff(args_tran.gffs, self.tran_path,
None, "transcript")
self.helper.check_make_folder(self.tmps["tran"])
for ta in os.listdir(self.tran_path):
if ta.endswith(".gff"):
if os.path.getsize(os.path.join(self.tran_path, ta)) != 0:
tas.append(ta.replace("_" + self.endfix_tran, ""))
self._post_modify(tas, args_tran)
self._compare_tss_cds(tas, args_tran)
self._compare_term_tran(args_tran)
gen_table_transcript(self.gff_outfolder, args_tran)
self._remove_file(args_tran)
示例14: SNPCalling
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_make_folder [as 别名]
class SNPCalling(object):
'''detection of SNP'''
def __init__(self, args_snp):
self.multiparser = Multiparser()
self.seq_editer = SeqEditer()
self.helper = Helper()
if args_snp.types == "related_genome":
file_type = "compare_related_and_reference_genomes"
else:
file_type = "mutations_of_reference_genomes"
self.seq_path = os.path.join(args_snp.out_folder, file_type, "seqs")
self.stat_path = os.path.join(args_snp.out_folder, file_type,
"statistics")
self.fig_path = os.path.join(self.stat_path, "figs")
self.helper.check_make_folder(self.fig_path)
self.outputs = {"table": os.path.join(
args_snp.out_folder, file_type, "SNP_tables"),
"raw": os.path.join(
args_snp.out_folder, file_type, "SNP_raw_outputs"),
"tmp": os.path.join(args_snp.out_folder, "tmp_bcf"),
"depth": os.path.join(args_snp.out_folder, "tmp_depth")}
self.bams = {"whole": os.path.join(args_snp.out_folder,
"whole_reads.bam"),
"sort": os.path.join(args_snp.out_folder,
"whole_reads_sorted.bam"),
"bams": []}
self.header = os.path.join(args_snp.out_folder, "header")
self.baqs = {"with": "with_BAQ", "without": "without_BAQ",
"extend": "extend_BAQ"}
def _transcript_snp(self, fasta, out_table_prefix, type_,
prefix, bam_datas, table_path, args_snp):
seq_path = os.path.join(self.seq_path, self.baqs[type_], prefix)
for bam in bam_datas:
stat_prefix = os.path.join(self.stat_path, "_".join([
"stat", "_".join([prefix, self.baqs[type_], bam["sample"]]),
"SNP"]))
snp_file = os.path.join(self.outputs["raw"], prefix, "_".join(
[prefix, self.baqs[type_], bam["sample"] + ".vcf"]))
snp_detect(
fasta, snp_file, self.outputs["depth"] + bam["sample"],
"_".join([out_table_prefix, bam["sample"]]),
os.path.join(seq_path, "_".join([prefix, bam["sample"]])),
bam["bam_number"], stat_prefix, args_snp, bam["rep"])
self.helper.move_all_content(table_path, self.fig_path, [".png"])
def _get_para(self, args_snp):
if args_snp.caller == "c":
bcf_para = "-vcO"
else:
bcf_para = "-vmO"
return bcf_para
def _run_tools(self, fasta_file, type_, args_snp, bam_datas, log):
bcf_para = self._get_para(args_snp)
for bam in bam_datas:
bam_file = os.path.join(args_snp.out_folder,
bam["sample"] + ".bam")
if type_ == "with":
command = [args_snp.samtools_path, "mpileup", "-t", "DP"]
elif type_ == "without":
command = [args_snp.samtools_path, "mpileup", "-t", "DP", "-B"]
elif type_ == "extend":
command = [args_snp.samtools_path, "mpileup", "-t", "DP", "-E"]
if args_snp.rg:
command = command + ["-ugf", fasta_file, bam_file]
else:
command = command + ["--ignore-RG", "-ugf", fasta_file, bam_file]
log.write(" ".join(command) + ">" + self.outputs["tmp"] + "\n")
os.system(" ".join(command) + ">" + self.outputs["tmp"])
bam["vcf"] = os.path.join(self.outputs["raw"], "_".join(
[self.baqs[type_], bam["sample"] + ".vcf"]))
if args_snp.chrom == "1":
log.write(" ".join([
args_snp.bcftools_path, "call", "--ploidy", args_snp.chrom,
self.outputs["tmp"], bcf_para, "v", "-o", bam["vcf"]]) + "\n")
call([args_snp.bcftools_path, "call", "--ploidy", args_snp.chrom,
self.outputs["tmp"], bcf_para, "v", "-o", bam["vcf"]])
elif args_snp.chrom == "2":
log.write(" ".join([args_snp.bcftools_path, "call",
self.outputs["tmp"], bcf_para, "v", "-o", bam["vcf"]]) + "\n")
call([args_snp.bcftools_path, "call",
self.outputs["tmp"], bcf_para, "v", "-o", bam["vcf"]])
log.write("Done!\n")
log.write("The following files are generated:\n")
for file_ in os.listdir(self.outputs["raw"]):
log.write("\t" + os.path.join(self.outputs["raw"], file_) + "\n")
def _parse_vcf_by_fa(self, args_snp, type_, num_prog, log):
seq_names = []
fa_prefixs = []
log.write("Parsing Vcf files by comparing fasta information.\n")
for fa in os.listdir(args_snp.fastas):
if (fa != "all.fa") and (not fa.endswith(".fai")):
with open(os.path.join(args_snp.fastas, fa)) as fh:
for line in fh:
line = line.strip()
if line.startswith(">"):
seq_names.append(line[1:])
#.........这里部分代码省略.........
示例15: Terminator
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import check_make_folder [as 别名]
class Terminator(object):
'''detection of terminator'''
def __init__(self, args_term):
self.multiparser = Multiparser()
self.helper = Helper()
self.converter = Converter()
self.gff_parser = Gff3Parser()
self.gff_path = os.path.join(args_term.gffs, "tmp")
self.fasta_path = os.path.join(args_term.fastas, "tmp")
self.tran_path = os.path.join(args_term.trans, "tmp")
self.outfolder = {"term": os.path.join(args_term.out_folder, "gffs"),
"csv": os.path.join(args_term.out_folder, "tables")}
self.terms = {"all": os.path.join(self.outfolder["term"],
"all_candidates"),
"express": os.path.join(self.outfolder["term"],
"expressed_candidates"),
"best": os.path.join(self.outfolder["term"],
"best_candidates"),
"non": os.path.join(self.outfolder["term"],
"non_expressed_candidates")}
self.csvs = {"all": os.path.join(self.outfolder["csv"],
"all_candidates"),
"express": os.path.join(self.outfolder["csv"],
"expressed_candidates"),
"best": os.path.join(self.outfolder["csv"],
"best_candidates"),
"non": os.path.join(self.outfolder["csv"],
"non_expressed_candidates")}
self.combine_path = os.path.join(self.gff_path, "combine")
self.tmps = {"transterm": os.path.join(os.getcwd(), "tmp_transterm"),
"hp": "transtermhp", "hp_gff": "transtermhp.gff",
"hp_path": "tmp_transterm/tmp",
"term_table": os.path.join(os.getcwd(), "tmp_term_table"),
"merge": os.path.join(os.getcwd(), "tmp_merge_gff"),
"gff": "tmp.gff",
"folder": os.path.join(os.getcwd(), "tmp")}
self.suffixs = {"gff": "term.gff", "csv": "term.csv",
"allgff": "term_all.gff"}
if args_term.srnas:
self.srna_path = os.path.join(args_term.srnas, "tmp")
else:
self.srna_path = None
self._make_gff_folder()
def _combine_annotation(self, combine_file, files):
with open(combine_file, 'w') as result:
for file_ in files:
if (file_.endswith(".ptt")) and (os.stat(file_).st_size == 0):
print("Warning: No CDS information, "
"TransTermHP can not work!")
return "NO_CDS"
if os.path.exists(file_) and (
os.stat(file_).st_size != 0):
check_start = False
fh = open(file_, 'r')
for line in fh:
if check_start:
result.write(line)
if "Location" in line:
check_start = True
if "\n" not in line:
result.write("\n")
fh.close()
return "Normal"
def _make_gff_folder(self):
self.helper.check_make_folder(self.terms["all"])
self.helper.check_make_folder(self.csvs["all"])
self.helper.check_make_folder(self.terms["best"])
self.helper.check_make_folder(self.csvs["best"])
self.helper.check_make_folder(self.terms["express"])
self.helper.check_make_folder(self.csvs["express"])
self.helper.check_make_folder(self.terms["non"])
self.helper.check_make_folder(self.csvs["non"])
def _convert_gff2rntptt(self, gff_path, fasta_path, sRNAs, log):
file_types = {}
prefixs = []
for gff in os.listdir(gff_path):
if gff.endswith(".gff"):
filename = gff.split("/")
prefix = filename[-1][:-4]
prefixs.append(prefix)
gff_file = os.path.join(gff_path, gff)
rnt_file = os.path.join(gff_path, gff.replace(".gff", ".rnt"))
ptt_file = os.path.join(gff_path, gff.replace(".gff", ".ptt"))
fasta = self.helper.get_correct_file(
fasta_path, ".fa", prefix, None, None)
if not fasta:
log.write("{0}.fa can not be found.\n".format(prefix))
print("Error: {0}.fa can not be found!".format(prefix))
sys.exit()
if sRNAs:
self.multiparser.parser_gff(sRNAs, "sRNA")
srna = self.helper.get_correct_file(
self.srna_path, "_sRNA.gff", prefix, None, None)
if (srna) and (fasta):
log.write("Running converter.py to convert {0} and "
"{1} to {2}, {3}, and {4}.\n".format(
#.........这里部分代码省略.........