本文整理汇总了Python中annogesiclib.helper.Helper.move_all_content方法的典型用法代码示例。如果您正苦于以下问题:Python Helper.move_all_content方法的具体用法?Python Helper.move_all_content怎么用?Python Helper.move_all_content使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类annogesiclib.helper.Helper
的用法示例。
在下文中一共展示了Helper.move_all_content方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: TargetFasta
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import move_all_content [as 别名]
class TargetFasta(object):
def __init__(self, tar_folder, ref_folder):
self.multiparser = Multiparser()
self.seq_editer = SeqEditer()
self.helper = Helper()
self.folders = {"tmp_tar": os.path.join(tar_folder, "tmp"),
"tmp_ref": os.path.join(ref_folder, "tmp")}
def get_target_fasta(self, mut_table, tar_folder, ref_folder, output):
self.multiparser.parser_fasta(ref_folder)
if "tmp" in os.listdir(tar_folder):
shutil.rmtree(self.folders["tmp_tar"])
os.mkdir(self.folders["tmp_tar"])
self.seq_editer.modify_seq(self.folders["tmp_ref"], mut_table,
self.folders["tmp_tar"])
print("transfer to target fasta...")
if output is not None:
for file_ in output:
first = True
datas = file_.split(":")
filename = datas[0]
strains = datas[1].split("_and_")
out = open(os.path.join(tar_folder, filename + ".fa"), "w")
for strain in strains:
if strain + ".fa" in os.listdir(self.folders["tmp_tar"]):
if first:
first = False
else:
out.write("\n")
with open(os.path.join(
self.folders["tmp_tar"],
strain + ".fa")) as f_h:
for line in f_h:
out.write(line)
else:
print("Error:no fasta information of {0}.fa".format(
strain))
out.close()
else:
self.helper.move_all_content(self.folders["tmp_tar"],
tar_folder, [".fa"])
shutil.rmtree(self.folders["tmp_tar"])
shutil.rmtree(self.folders["tmp_ref"])
self.helper.remove_all_content(ref_folder, "_folder", "dir")
print("please use the new fasta file to remapping again.")
print("Then copy BAMs and wigs back to input/align_results/BAMs "
"and input/align_results/wigs")
示例2: sRNADetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import move_all_content [as 别名]
#.........这里部分代码省略.........
"_".join([self.prefixs["energy"], prefix])))
for ps in os.listdir(os.getcwd()):
new_ps = ps.replace("|", "_")
shutil.move(ps, new_ps)
return {"sec": tmp_sec_path, "dot": tmp_dot_path, "main": main_path,
"tmp": os.path.join(main_path, tmp_path)}
def _run_replot(self, vienna_util, tmp_paths, file_, dot_file, rel_file):
os.system(" ".join([os.path.join(vienna_util, "relplot.pl"),
os.path.join(tmp_paths["tmp"], file_),
os.path.join(tmp_paths["tmp"], dot_file),
">", os.path.join(tmp_paths["tmp"], rel_file)]))
def _convert_pdf(self, ps2pdf14_path, tmp_paths, file_, pdf_file):
call([ps2pdf14_path, os.path.join(tmp_paths["tmp"], file_), pdf_file])
def _replot_sec_to_pdf(self, vienna_util, tmp_paths,
ps2pdf14_path, prefix):
for file_ in os.listdir(os.getcwd()):
if file_.endswith("ss.ps"):
dot_file = file_.replace("ss.ps", "dp.ps")
rel_file = file_.replace("ss.ps", "rss.ps")
print("replot {0}".format(file_))
self._run_replot(vienna_util, tmp_paths, file_,
dot_file, rel_file)
for file_ in os.listdir(tmp_paths["tmp"]):
if (file_.endswith("rss.ps")) or (file_.endswith("dp.ps")):
pdf_file = file_.replace(".ps", ".pdf")
print("convert {0} to pdf".format(file_))
self._convert_pdf(ps2pdf14_path, tmp_paths,
file_, pdf_file)
os.mkdir(os.path.join(tmp_paths["sec"], prefix))
os.mkdir(os.path.join(tmp_paths["dot"], prefix))
self.helper.move_all_content(
tmp_paths["tmp"], os.path.join(tmp_paths["sec"], prefix),
["rss.pdf"])
self.helper.move_all_content(
tmp_paths["tmp"], os.path.join(tmp_paths["dot"], prefix),
["dp.pdf"])
def _run_mountain(self, vienna_util, tmp_paths, dot_file, out):
call([os.path.join(vienna_util, "mountain.pl"),
os.path.join(tmp_paths["tmp"], dot_file)], stdout=out)
def _plot_mountain(self, mountain, moun_path,
tmp_paths, prefix, vienna_util):
if mountain:
tmp_moun_path = os.path.join(tmp_paths["main"], moun_path)
os.mkdir(os.path.join(tmp_moun_path, prefix))
txt_path = os.path.join(tmp_paths["tmp"], "tmp_txt")
self.helper.check_make_folder(txt_path)
print("Generating mountain plot of {0}....".format(prefix))
for dot_file in os.listdir(tmp_paths["tmp"]):
if dot_file.endswith("dp.ps"):
moun_txt = os.path.join(tmp_paths["tmp"], "mountain.txt")
out = open(moun_txt, "w")
moun_file = dot_file.replace("dp.ps", "mountain.pdf")
print("Generating {0}".format(moun_file))
self._run_mountain(vienna_util, tmp_paths, dot_file, out)
plot_mountain_plot(moun_txt, moun_file)
shutil.move(moun_file,
os.path.join(tmp_moun_path, prefix, moun_file))
out.close()
os.remove(moun_txt)
def _compute_2d_and_energy(self, args_srna, prefixs):
示例3: SNPCalling
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import move_all_content [as 别名]
class SNPCalling(object):
def __init__(self, args_snp):
self.multiparser = Multiparser()
self.seq_editer = SeqEditer()
self.helper = Helper()
if args_snp.types == "reference":
file_type = "compare_reference"
else:
file_type = "validate_target"
self.seq_path = os.path.join(args_snp.out_folder, file_type, "seqs")
self.stat_path = os.path.join(args_snp.out_folder, file_type,
"statistics")
self.fasta_path = os.path.join(args_snp.fastas, "tmp")
self.outputs = {"table": os.path.join(
args_snp.out_folder, file_type, "SNP_table"),
"raw": os.path.join(
args_snp.out_folder, file_type, "SNP_raw_outputs"),
"tmp": os.path.join(args_snp.out_folder, "tmp_bcf")}
if "whole_reads.bam" in os.listdir(args_snp.out_folder):
self.helper.remove_all_content(args_snp.out_folder,
"whole_read", "file")
self.bams = {"whole": os.path.join(args_snp.out_folder,
"whole_reads.bam"),
"sort": os.path.join(args_snp.out_folder,
"whole_reads_sorted.bam")}
self.header = os.path.join(args_snp.out_folder, "header")
self.baqs = {"with": "with_BAQ", "without": "without_BAQ",
"extend": "extend_BAQ"}
def _import_bam(self, bam_folder, bams):
num_bam = 0
for bam in os.listdir(bam_folder):
if bam.endswith(".bam"):
num_bam += 1
bams.append(os.path.join(bam_folder, bam))
return num_bam
def _transcript_snp(self, fasta, snp, out_table_prefix, type_,
prefix, bam_number, table_path, args_snp):
seq_path = os.path.join(self.seq_path, self.baqs[type_], prefix)
stat_file = os.path.join(self.stat_path, "_".join([
"stat", "_".join([prefix, self.baqs[type_]]), "SNP.csv"]))
snp_detect(fasta, snp, out_table_prefix,
os.path.join(seq_path, prefix), bam_number,
stat_file, args_snp)
self.helper.move_all_content(table_path, self.stat_path, [".png"])
def _run_tools(self, fasta_file, out_bcf, out_raw_prefix, type_, args_snp):
if type_ == "with":
call([args_snp.samtools_path, "mpileup",
"-t", "DP", "-ugf", fasta_file, self.bams["sort"],
"--ignore-RG"], stdout=out_bcf)
elif type_ == "without":
call([args_snp.samtools_path, "mpileup",
"-t", "DP", "-B", "-ugf", fasta_file,
self.bams["sort"], "--ignore-RG"],
stdout=out_bcf)
elif type_ == "extend":
call([args_snp.samtools_path, "mpileup",
"-t", "DP", "-E", "-ugf", fasta_file,
self.bams["sort"], "--ignore-RG"], stdout=out_bcf)
out_vcf = "_".join([out_raw_prefix, self.baqs[type_] + ".vcf"])
if args_snp.chrom == "1":
call([args_snp.bcftools_path, "call", "--ploidy", args_snp.chrom,
self.outputs["tmp"], "-vmO", "v", "-o", out_vcf])
elif args_snp.chrom == "2":
call([args_snp.bcftools_path, "call",
self.outputs["tmp"], "-vmO", "v", "-o", out_vcf])
return out_vcf
def _run_sub(self, args_snp, fasta_file, type_, file_prefixs, prefix,
table_path, bam_number):
out_bcf = open(self.outputs["tmp"], "w")
out_vcf = self._run_tools(fasta_file, out_bcf,
file_prefixs["raw_prefix"], type_, args_snp)
self.helper.check_make_folder(
os.path.join(self.seq_path, self.baqs[type_], prefix))
self._transcript_snp(
fasta_file, out_vcf,
"_".join([file_prefixs["table_prefix"], self.baqs[type_]]),
type_, prefix, bam_number, table_path, args_snp)
out_bcf.close()
def _run_program(self, fasta_file, file_prefixs, prefix, bam_number,
table_path, args_snp):
for index in args_snp.program:
if index == "1":
type_ = "with"
print("Running SNP calling with BAQ...")
elif index == "2":
type_ = "without"
print("Running SNP calling without BAQ...")
elif index == "3":
print("Running SNP calling extend BAQ...")
type_ = "extend"
else:
print("Error: No correct program, please assign 1, 2, 3")
sys.exit()
self._run_sub(args_snp, fasta_file, type_, file_prefixs, prefix,
#.........这里部分代码省略.........
示例4: RATT
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import move_all_content [as 别名]
class RATT(object):
def __init__(self, args_ratt):
self.multiparser = Multiparser()
self.converter = Converter()
self.format_fixer = FormatFixer()
self.helper = Helper()
self.gbk = os.path.join(args_ratt.ref_embls, "gbk_tmp")
self.gbk_tmp = os.path.join(self.gbk, "tmp")
self.embl = os.path.join(args_ratt.ref_embls, "embls")
self.ratt_log = os.path.join(args_ratt.output_path, "ratt_log.txt")
self.tmp_files = {"tar": os.path.join(args_ratt.tar_fastas, "tmp"),
"ref": os.path.join(args_ratt.ref_fastas, "tmp"),
"out_gff": os.path.join(args_ratt.gff_outfolder,
"tmp"),
"gff": os.path.join(args_ratt.gff_outfolder,
"tmp.gff"),
"ptt": os.path.join(args_ratt.gff_outfolder,
"tmp.ptt"),
"rnt": os.path.join(args_ratt.gff_outfolder,
"tmp.rnt")}
def _convert_to_pttrnt(self, gffs, files):
for gff in files:
if gff.endswith(".gff"):
gff = os.path.join(gffs, gff)
filename = gff.split("/")
prefix = filename[-1][:-4]
rnt = gff[:-3] + "rnt"
ptt = gff[:-3] + "ptt"
fasta = self.helper.get_correct_file(self.tmp_files["tar"],
".fa", prefix, None, None)
if fasta:
self.converter.convert_gff2rntptt(gff, fasta, ptt, rnt,
None, None)
def _remove_files(self, args_ratt, out_gbk):
self.helper.remove_all_content(args_ratt.gff_outfolder, ".gff", "file")
self.helper.remove_all_content(args_ratt.gff_outfolder, ".ptt", "file")
self.helper.remove_all_content(args_ratt.gff_outfolder, ".rnt", "file")
self.helper.move_all_content(self.tmp_files["out_gff"],
args_ratt.gff_outfolder, None)
shutil.rmtree(self.tmp_files["out_gff"])
shutil.rmtree(self.tmp_files["tar"])
shutil.rmtree(self.tmp_files["ref"])
shutil.rmtree(self.embl)
self.helper.remove_all_content(args_ratt.tar_fastas, "_folder", "dir")
self.helper.remove_all_content(args_ratt.ref_fastas, "_folder", "dir")
if out_gbk:
shutil.rmtree(out_gbk)
def _convert_to_gff(self, ratt_result, args_ratt, files):
name = ratt_result.split(".")
filename = ".".join(name[1:-2]) + ".gff"
output_file = os.path.join(args_ratt.output_path, filename)
self.converter.convert_embl2gff(
os.path.join(args_ratt.output_path, ratt_result), output_file)
self.format_fixer.fix_ratt(output_file, ".".join(name[1:-2]),
"tmp_gff")
shutil.move("tmp_gff", output_file)
shutil.copy(output_file, os.path.join(args_ratt.gff_outfolder,
filename))
files.append(filename)
def _parser_embl_gbk(self, files):
self.helper.check_make_folder(self.gbk)
for file_ in files:
close = False
with open(file_, "r") as f_h:
for line in f_h:
if (line.startswith("LOCUS")):
out = open(self.gbk_tmp, "w")
datas = line.split(" ")
for data in datas:
if (len(data) != 0) and (data != "LOCUS"):
filename = ".".join([data, "gbk"])
break
elif (line.startswith("VERSION")):
datas = line.split(" ")
for data in datas:
if (len(data) != 0) and (data != "VERSION"):
new_filename = ".".join([data, "gbk"])
break
if new_filename.find(filename):
filename = new_filename
if out:
out.write(line)
if line.startswith("//"):
out.close()
close = True
shutil.move(self.gbk_tmp,
os.path.join(self.gbk, filename))
if not close:
out.close()
return self.gbk
def _convert_embl(self, ref_embls):
detect_gbk = False
gbks = []
out_gbk = None
#.........这里部分代码省略.........
示例5: GoTermFinding
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import move_all_content [as 别名]
class GoTermFinding(object):
'''Retrieving the GO term'''
def __init__(self, args_go):
self.multiparser = Multiparser()
self.helper = Helper()
self.out_all = os.path.join(args_go.out_folder, "all_CDSs")
self.out_express = os.path.join(args_go.out_folder, "expressed_CDSs")
self.result_all_path = os.path.join(self.out_all, "GO_term_results")
self.result_express_path = os.path.join(self.out_express,
"GO_term_results")
self.gff_path = os.path.join(args_go.gffs, "tmp")
if args_go.trans is not None:
self.tran_path = os.path.join(args_go.trans, "tmp")
else:
self.tran_path = None
self.stat_all_path = os.path.join(self.out_all, "statistics")
self.stat_express_path = os.path.join(self.out_express,
"statistics")
self.all_strain = "all_genomes_uniprot.csv"
def _retrieve_go(self, uniprot, out_path, type_, log):
prefixs = []
log.write("Running gene_ontology.py to retrieve GO terms.\n")
for gff in os.listdir(self.gff_path):
prefix = gff.replace(".gff", "")
prefixs.append(prefix)
self.helper.check_make_folder(os.path.join(out_path, prefix))
out_file = os.path.join(out_path, prefix,
"_".join([prefix, "uniprot.csv"]))
print("Extracting GO terms of {0} from UniProt".format(prefix))
if self.tran_path is not None:
tran_file = os.path.join(self.tran_path,
"_".join([prefix, "transcript.gff"]))
else:
tran_file = None
retrieve_uniprot(uniprot, os.path.join(self.gff_path, gff),
out_file, tran_file, type_)
log.write("\t" + out_file + " is generated.\n")
def _remove_header(self, out_all):
out = open(out_all + "_tmp", "w")
fh = open(out_all, "r")
out.write("\t".join(["Genome", "Strand", "Start", "End",
"Protein_id", "Go_term"]) + "\n")
for row in csv.reader(fh, delimiter='\t'):
if row[0] != "Genome":
out.write("\t".join(row) + "\n")
out.close()
fh.close()
shutil.move(out_all + "_tmp", out_all)
def _merge_files(self, gffs, out_path, out_folder, log):
'''merge the files according to the input genome folder'''
folders = []
log.write("Merging the output files based on the input genome "
"information.\n")
for folder in os.listdir(gffs):
if folder.endswith("gff_folder"):
folder_prefix = folder.replace(".gff_folder", "")
folder_path = os.path.join(out_folder, folder_prefix)
self.helper.check_make_folder(folder_path)
folders.append(folder_path)
filenames = []
for gff in os.listdir(os.path.join(gffs, folder)):
if gff.endswith(".gff"):
filenames.append(gff.replace(".gff", ""))
out_all = os.path.join(folder_path, self.all_strain)
if len(filenames) > 1:
if self.all_strain in os.listdir(folder_path):
os.remove(out_all)
for filename in filenames:
csv_file = "_".join([filename, "uniprot.csv"])
self.helper.merge_file(os.path.join(out_path,
filename, csv_file), out_all)
self._remove_header(out_all)
shutil.copy(os.path.join(out_path, filename, csv_file),
folder_path)
else:
shutil.copyfile(os.path.join(out_path, filenames[0],
"_".join([filenames[0], "uniprot.csv"])),
out_all)
self.helper.remove_all_content(out_path, None, "dir")
self.helper.remove_all_content(out_path, None, "file")
for folder in folders:
folder_prefix = folder.split("/")[-1]
shutil.move(folder, os.path.join(out_path, folder_prefix))
for file_ in os.listdir(os.path.join(out_path, folder_prefix)):
log.write("\t" + os.path.join(out_path, folder_prefix, file_) +
" is generated.\n")
def _stat(self, out_path, stat_path, go, goslim, out_folder, log):
log.write("Running gene_ontology.py to Retrieve GOslim terms and "
"do statistics.\n")
log.write("The following files are generated:\n")
for folder in os.listdir(out_path):
strain_stat_path = os.path.join(stat_path, folder)
self.helper.check_make_folder(strain_stat_path)
fig_path = os.path.join(strain_stat_path, "figs")
if "fig" not in os.listdir(strain_stat_path):
#.........这里部分代码省略.........
示例6: Multiparser
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import move_all_content [as 别名]
class Multiparser(object):
def __init__(self):
self.seq_editer = SeqEditer()
self.helper = Helper()
self.tmp_fa = "tmp.fa"
self.tmp_gff = "tmp.gff"
self.tmp_wig_forward = "tmp_forward.wig"
self.tmp_wig_reverse = "tmp_reverse.wig"
def combine_fasta(self, ref_folder, tar_folder, ref_feature):
tar_merge = os.path.join(tar_folder, "merge_tmp")
change = False
if ref_feature is None:
ref_feature = ""
else:
ref_feature = "_" + ref_feature
self.helper.check_make_folder(tar_merge)
for folder in os.listdir(ref_folder):
files = []
if "_folder" in folder:
datas = folder.split("_folder")
if ref_feature == "":
prefix = datas[0][:-4]
elif ref_feature == "_fasta":
if datas[0].endswith(".fa"):
prefix = datas[0][:-3]
elif datas[0].endswith(".fna"):
prefix = datas[0][:-4]
elif datas[0].endswith(".fasta"):
prefix = datas[0][:-6]
else:
datas = datas[0][:-4]
datas = datas.split(ref_feature)
prefix = datas[0]
print("Merging fasta file of " + prefix)
for file_ in os.listdir("/".join([ref_folder, folder])):
if ref_feature == "":
files.append(file_[:-4])
elif ref_feature == "_fasta":
files.append(file_[:-3])
else:
filename = file_.split(ref_feature)
files.append(filename[0])
for tar in os.listdir(tar_folder):
if tar.endswith(".fa") or \
tar.endswith(".fna") or \
tar.endswith(".fasta"):
filename = ".".join((tar.split("."))[:-1])
for file_ in files:
if filename == file_:
self.helper.merge_file(
os.path.join(tar_folder, tar),
os.path.join(tar_folder, self.tmp_fa))
change = True
if change:
change = False
shutil.move(os.path.join(tar_folder, self.tmp_fa),
os.path.join(tar_merge, prefix + ".fa"))
self.helper.remove_all_content(tar_folder, ".fa", "file")
self.helper.move_all_content(tar_merge, tar_folder, None)
shutil.rmtree(tar_merge)
def get_prefix(self, folder, ref_feature):
datas = folder.split("_folder")
if ref_feature == "":
prefix = datas[0][:-4]
elif ref_feature == "_fasta":
if datas[0].endswith(".fa"):
prefix = datas[0][:-3]
elif datas[0].endswith(".fna"):
prefix = datas[0][:-4]
elif datas[0].endswith(".fasta"):
prefix = datas[0][:-6]
else:
datas = datas[0][:-4]
datas = datas.split(ref_feature)
prefix = datas[0]
return prefix
def combine_wig(self, ref_folder, tar_folder, ref_feature, libs):
tar_merge = os.path.join(tar_folder, "merge_tmp")
change_f = False
change_r = False
if ref_feature is None:
ref_feature = ""
else:
ref_feature = "_" + ref_feature
self.helper.check_make_folder(tar_merge)
for folder in os.listdir(ref_folder):
files = []
if "_folder" in folder:
prefix = self.get_prefix(folder, ref_feature)
print("Merging wig file of " + prefix)
for file_ in os.listdir(os.path.join(ref_folder, folder)):
if ref_feature == "":
files.append(file_[:-4])
elif ref_feature == "_fasta":
files.append(file_[:-3])
else:
#.........这里部分代码省略.........
示例7: RATT
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import move_all_content [as 别名]
class RATT(object):
'''annotation transfer'''
def __init__(self, args_ratt):
self.multiparser = Multiparser()
self.converter = Converter()
self.format_fixer = FormatFixer()
self.helper = Helper()
if args_ratt.ref_gbk:
self.gbk = os.path.join(args_ratt.ref_gbk, "gbk_tmp")
self.gbk_tmp = os.path.join(self.gbk, "tmp")
self.embl = os.path.join(args_ratt.ref_gbk, "embls")
if args_ratt.ref_embls:
self.embl = args_ratt.ref_embls
self.ratt_log = os.path.join(args_ratt.output_path, "ratt_log.txt")
self.tmp_files = {"tar": os.path.join(args_ratt.tar_fastas, "tmp"),
"ref": os.path.join(args_ratt.ref_fastas, "tmp"),
"out_gff": os.path.join(args_ratt.gff_outfolder,
"tmp"),
"gff": os.path.join(args_ratt.gff_outfolder,
"tmp.gff"),
"ptt": os.path.join(args_ratt.gff_outfolder,
"tmp.ptt"),
"rnt": os.path.join(args_ratt.gff_outfolder,
"tmp.rnt")}
def _convert_to_pttrnt(self, gffs, files, log):
for gff in files:
if gff.endswith(".gff"):
gff = os.path.join(gffs, gff)
filename = gff.split("/")
prefix = filename[-1][:-4]
rnt = gff[:-3] + "rnt"
ptt = gff[:-3] + "ptt"
fasta = self.helper.get_correct_file(self.tmp_files["tar"],
".fa", prefix, None, None)
if fasta:
self.converter.convert_gff2rntptt(gff, fasta, ptt, rnt,
None, None)
log.write("\t" + ptt + " is generated.\n")
log.write("\t" + rnt + " is generated.\n")
def _remove_files(self, args_ratt, out_gbk, log):
self.helper.remove_all_content(args_ratt.gff_outfolder, ".gff", "file")
self.helper.remove_all_content(args_ratt.gff_outfolder, ".ptt", "file")
self.helper.remove_all_content(args_ratt.gff_outfolder, ".rnt", "file")
log.write("Moving the final output files to {0}.\n".format(args_ratt.gff_outfolder))
self.helper.move_all_content(self.tmp_files["out_gff"],
args_ratt.gff_outfolder, None)
log.write("Remove the temperary files.\n")
shutil.rmtree(self.tmp_files["out_gff"])
shutil.rmtree(self.tmp_files["tar"])
shutil.rmtree(self.tmp_files["ref"])
self.helper.remove_tmp_dir(args_ratt.tar_fastas)
self.helper.remove_tmp_dir(args_ratt.ref_fastas)
self.helper.remove_tmp_dir(args_ratt.ref_embls)
self.helper.remove_tmp_dir(args_ratt.ref_gbk)
def _convert_to_gff(self, ratt_result, args_ratt, files, log):
name = ratt_result.split(".")
filename = ".".join(name[1:-2]) + ".gff"
output_file = os.path.join(args_ratt.output_path, filename)
self.converter.convert_embl2gff(
os.path.join(args_ratt.output_path, ratt_result), output_file)
self.format_fixer.fix_ratt(output_file, ".".join(name[1:-2]),
"tmp_gff")
shutil.move("tmp_gff", output_file)
shutil.copy(output_file, os.path.join(args_ratt.gff_outfolder,
filename))
log.write("\t" + os.path.join(args_ratt.gff_outfolder, filename) +
" is generated.\n")
files.append(filename)
def _parser_embl_gbk(self, files):
self.helper.check_make_folder(self.gbk)
for file_ in files:
close = False
with open(file_, "r") as f_h:
for line in f_h:
if (line.startswith("LOCUS")):
out = open(self.gbk_tmp, "w")
datas = line.split(" ")
for data in datas:
if (len(data) != 0) and (data != "LOCUS"):
filename = ".".join([data.strip(), "gbk"])
break
elif (line.startswith("VERSION")):
datas = line.split(" ")
for data in datas:
if (len(data) != 0) and (data != "VERSION"):
new_filename = ".".join([data.strip(), "gbk"])
break
if new_filename.find(filename):
filename = new_filename
if out:
out.write(line)
if line.startswith("//"):
out.close()
close = True
shutil.move(self.gbk_tmp,
#.........这里部分代码省略.........
示例8: CircRNADetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import move_all_content [as 别名]
#.........这里部分代码省略.........
def _run_samtools_convert_sam(self, samtools_path, sub_alignment_path):
print("Convert whole reads bam file to sam file....")
call([samtools_path, "view", "-h", "-o",
os.path.join(sub_alignment_path, self.bams["sort"] + ".sam"),
os.path.join(sub_alignment_path, self.bams["sort"] + ".bam")])
def _merge_sort_aligment_file(self, bam_files, samtools_path,
sub_alignment_path, convert_ones,
tmp_reads, remove_ones):
self._run_samtools_merge_sort(samtools_path,
sub_alignment_path, bam_files)
self._run_samtools_convert_sam(samtools_path, sub_alignment_path)
for bam in convert_ones:
os.remove(bam)
for sam in remove_ones:
os.remove(sam)
if len(tmp_reads) != 0:
for read in tmp_reads:
os.remove(read)
def _run_testrealign(self, prefix, segemehl_path, sub_alignment_path):
self.helper.check_make_folder(os.path.join(self.splice_path, prefix))
sub_splice_path = os.path.join(self.splice_path, prefix)
err_log = os.path.join(sub_splice_path, prefix + ".log")
print("Running testrealign.x for {0}".format(prefix))
command = " ".join([
os.path.join(segemehl_path, "testrealign.x"),
"-d", os.path.join(self.fasta_path, prefix + ".fa"),
"-q", os.path.join(sub_alignment_path,
self.bams["sort"] + ".sam"),
"-n"])
os.system(command + " 2>" + err_log)
self.helper.move_all_content(os.getcwd(), sub_splice_path, [".bed"])
self.helper.remove_all_content(sub_alignment_path,
self.bams["sort"], "file")
def _merge_bed(self, fastas, splice_path):
tmp_prefixs = []
for fasta in os.listdir(fastas):
headers = []
if (fasta.endswith(".fa") or fasta.endswith(".fna") or
fasta.endswith(".fasta")):
with open(os.path.join(fastas, fasta), "r") as f_h:
for line in f_h:
line = line.strip()
if line.startswith(">"):
headers.append(line[1:])
filename = fasta.split(".")
fasta_prefix = ".".join(filename[:-1])
tmp_prefixs.append(fasta_prefix)
self.helper.check_make_folder(os.path.join(
os.getcwd(), fasta_prefix))
for header in headers:
shutil.copyfile(os.path.join(splice_path, header,
self.splices["file"]),
os.path.join(fasta_prefix,
"_".join([self.splices["splice"],
header + ".bed"])))
shutil.copyfile(os.path.join(splice_path, header,
self.trans["file"]),
os.path.join(fasta_prefix,
"_".join([self.trans["trans"],
header + ".bed"])))
out_splice = os.path.join(fasta_prefix,
self.splices["all_file"])
示例9: TSSpredator
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import move_all_content [as 别名]
#.........这里部分代码省略.........
feature = "processing_site"
elif args_tss.program.lower() == "tss":
feature = "TSS"
self.converter.convert_mastertable2gff(
os.path.join(out_path, "MasterTable.tsv"),
"ANNOgesic", feature, prefix, out_file)
log.write("\t" + out_file + "is generated.\n")
gff_f.close()
def _merge_manual(self, tsss, args_tss):
'''if manual detected TSS is provided, it can merge manual detected TSS
and TSSpredator predicted TSS'''
self.helper.check_make_folder(os.path.join(os.getcwd(),
self.tmps["tss"]))
for tss in tsss:
for gff in os.listdir(args_tss.gffs):
if (gff[:-4] == tss) and (".gff" in gff):
break
filename = "_".join([tss, args_tss.program]) + ".gff"
predict = os.path.join(self.gff_outfolder, filename)
manual = os.path.join(self.manual_path, tss + ".gff")
fasta = os.path.join(self.fasta_path, tss + ".fa")
stat_file = "stat_compare_TSSpredator_manual_{0}.csv".format(tss)
if os.path.exists(manual):
print("Merging and classiflying manually-detected "
"TSSs for {0}".format(tss))
merge_manual_predict_tss(
predict, stat_file,
os.path.join(self.tmps["tss"], filename),
os.path.join(args_tss.gffs, gff), args_tss, manual, fasta)
if os.path.exists(stat_file):
shutil.move(stat_file, os.path.join(
args_tss.out_folder, "statistics", tss, stat_file))
self.helper.move_all_content(self.tmps["tss"],
self.gff_outfolder, [".gff"])
shutil.rmtree(self.tmps["tss"])
def _validate(self, tsss, args_tss, log):
'''validate TSS with genome annotation'''
print("Validating TSSs with genome annotations")
log.write("Running validate_gene.py to compare genome "
"annotations and TSSs/PSs.\n")
for tss in tsss:
for gff in os.listdir(args_tss.gffs):
if (gff[:-4] == tss) and (".gff" in gff):
break
stat_file = os.path.join(
self.stat_outfolder, tss,
"".join(["stat_gene_vali_", tss, ".csv"]))
out_cds_file = os.path.join(args_tss.out_folder, "tmp.gff")
if args_tss.program.lower() == "tss":
compare_file = os.path.join(self.gff_outfolder,
"_".join([tss, "TSS.gff"]))
elif args_tss.program.lower() == "processing":
compare_file = os.path.join(self.gff_outfolder,
"_".join([tss, "processing.gff"]))
validate_gff(compare_file, os.path.join(args_tss.gffs, gff),
stat_file, out_cds_file, args_tss.utr_length,
args_tss.program.lower())
log.write("\t" + stat_file + " is generated.\n")
shutil.move(out_cds_file, os.path.join(args_tss.gffs, gff))
def _compare_ta(self, tsss, args_tss, log):
'''compare TSS with transcript'''
detect = False
log.write("Running stat_TA_comparison to compare transcripts "
示例10: UTRDetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import move_all_content [as 别名]
class UTRDetection(object):
def __init__(self, args_utr):
self.helper = Helper()
self.multiparser = Multiparser()
self.tss_path = os.path.join(args_utr.tsss, "tmp")
self.tran_path = os.path.join(args_utr.trans, "tmp")
self.utr5_path = os.path.join(args_utr.out_folder, "5UTR")
self.utr3_path = os.path.join(args_utr.out_folder, "3UTR")
self.utr5_stat_path = os.path.join(self.utr5_path, "statistics")
self.utr3_stat_path = os.path.join(self.utr3_path, "statistics")
def _check_folder(self, folder):
if folder is None:
print("Error: lack required files!!!")
sys.exit()
def _check_gff(self, folder):
for gff in os.listdir(folder):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(folder, gff))
def _compute_utr(self, args_utr):
for gff in os.listdir(args_utr.gffs):
if gff.endswith(".gff"):
prefix = gff[:-4]
tss = self.helper.get_correct_file(
self.tss_path, "_TSS.gff", prefix, None, None)
tran = self.helper.get_correct_file(
self.tran_path, "_transcript.gff", prefix, None, None)
if args_utr.terms:
term = self.helper.get_correct_file(
os.path.join(args_utr.terms, "tmp"),
"_term.gff", prefix, None, None)
else:
term = None
print("computing 5'UTR of {0} .....".format(prefix))
detect_5utr(tss, os.path.join(args_utr.gffs, gff),
tran, os.path.join(self.utr5_path, "gffs",
"_".join([prefix, "5UTR.gff"])), args_utr)
print("computing 3'UTR of {0} .....".format(prefix))
detect_3utr(tran, os.path.join(args_utr.gffs, gff),
term, os.path.join(self.utr3_path, "gffs",
"_".join([prefix, "3UTR.gff"])), args_utr)
self.helper.move_all_content(
os.getcwd(), self.utr5_stat_path, ["_5utr_length.png"])
self.helper.move_all_content(
os.getcwd(), self.utr3_stat_path, ["_3utr_length.png"])
def run_utr_detection(self, args_utr):
self._check_folder(args_utr.tsss)
self._check_folder(args_utr.gffs)
self._check_folder(args_utr.trans)
self._check_gff(args_utr.tsss)
self._check_gff(args_utr.gffs)
self._check_gff(args_utr.trans)
self._check_gff(args_utr.terms)
self.multiparser.parser_gff(args_utr.gffs, None)
self.multiparser.parser_gff(args_utr.tsss, "TSS")
self.multiparser.combine_gff(args_utr.gffs, self.tss_path, None, "TSS")
self.multiparser.parser_gff(args_utr.trans, "transcript")
self.multiparser.combine_gff(args_utr.gffs, self.tran_path,
None, "transcript")
if args_utr.terms:
self.multiparser.parser_gff(args_utr.terms, "term")
self.multiparser.combine_gff(args_utr.gffs,
os.path.join(args_utr.terms, "tmp"),
None, "term")
self._compute_utr(args_utr)
self.helper.remove_tmp(args_utr.gffs)
self.helper.remove_tmp(args_utr.tsss)
self.helper.remove_tmp(args_utr.trans)
self.helper.remove_tmp(args_utr.terms)
self.helper.remove_tmp(self.utr5_path)
self.helper.remove_tmp(self.utr3_path)
示例11: TSSpredator
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import move_all_content [as 别名]
#.........这里部分代码省略.........
for prefix in prefixs:
out_file = os.path.join(self.gff_outfolder, "_".join([
prefix, args_tss.program]) + ".gff")
gff_f = open(out_file, "w")
out_path = os.path.join(self.master, "_".join([
"MasterTable", prefix]))
if "MasterTable.tsv" not in os.listdir(out_path):
print("Error:there is not MasterTable file in {0}".format(
out_path))
print("Please check configuration file.")
else:
self.converter.convert_mastertable2gff(
os.path.join(out_path, "MasterTable.tsv"),
"ANNOgesic", args_tss.program, prefix, out_file)
gff_f.close()
def _merge_manual(self, tsss, args_tss):
self.helper.check_make_folder(os.path.join(os.getcwd(),
self.tmps["tss"]))
for tss in tsss:
for gff in os.listdir(args_tss.gffs):
if (gff[:-4] == tss) and (".gff" in gff):
break
filename = "_".join([tss, args_tss.program]) + ".gff"
predict = os.path.join(self.gff_outfolder, filename)
print("Running merge and classify manual ....")
stat_file = "stat_compare_TSSpredator_manual_{0}.csv".format(tss)
merge_manual_predict_tss(
predict, stat_file,
os.path.join(self.tmps["tss"], filename),
os.path.join(args_tss.gffs, gff), args_tss)
shutil.move(stat_file, os.path.join(args_tss.out_folder,
"statistics", tss, stat_file))
self.helper.move_all_content(self.tmps["tss"],
self.gff_outfolder, [".gff"])
shutil.rmtree(self.tmps["tss"])
def _validate(self, tsss, args_tss):
print("Running validation of annotation....")
for tss in tsss:
for gff in os.listdir(args_tss.gffs):
if (gff[:-4] == tss) and (".gff" in gff):
break
stat_file = os.path.join(
self.stat_outfolder, tss,
"".join(["stat_gene_vali_", tss, ".csv"]))
out_cds_file = os.path.join(args_tss.out_folder, "tmp.gff")
if args_tss.program.lower() == "tss":
compare_file = os.path.join(self.gff_outfolder,
"_".join([tss, "TSS.gff"]))
elif args_tss.program.lower() == "processing":
compare_file = os.path.join(self.gff_outfolder,
"_".join([tss, "processing.gff"]))
validate_gff(compare_file, os.path.join(args_tss.gffs, gff),
stat_file, out_cds_file, args_tss.utr_length,
args_tss.program.lower())
shutil.move(out_cds_file, os.path.join(args_tss.gffs, gff))
def _compare_ta(self, tsss, args_tss):
detect = False
print("Running compare transcript assembly and TSS ...")
self.multiparser.parser_gff(args_tss.ta_files, "transcript")
self.multiparser.combine_gff(args_tss.gffs, self.tmps["ta"],
None, "transcript")
for tss in tsss:
stat_out = os.path.join(
示例12: GoTermFinding
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import move_all_content [as 别名]
class GoTermFinding(object):
def __init__(self, args_go):
self.multiparser = Multiparser()
self.helper = Helper()
self.out_all = os.path.join(args_go.out_folder, "all_CDS")
self.out_express = os.path.join(args_go.out_folder, "expressed_CDS")
self.result_all_path = os.path.join(self.out_all, "Go_term_results")
self.result_express_path = os.path.join(self.out_express,
"Go_term_results")
self.gff_path = os.path.join(args_go.gffs, "tmp")
if args_go.trans is not None:
self.tran_path = os.path.join(args_go.trans, "tmp")
else:
self.tran_path = None
self.stat_all_path = os.path.join(self.out_all, "statistics")
self.stat_express_path = os.path.join(self.out_express,
"statistics")
self.all_strain = "all_strains_uniprot.csv"
def _retrieve_go(self, uniprot, out_path, type_):
prefixs = []
for gff in os.listdir(self.gff_path):
prefix = gff.replace(".gff", "")
prefixs.append(prefix)
self.helper.check_make_folder(os.path.join(out_path, prefix))
out_file = os.path.join(out_path, prefix,
"_".join([prefix, "uniprot.csv"]))
print("extracting Go terms of {0} from UniProt...".format(prefix))
if self.tran_path is not None:
tran_file = os.path.join(self.tran_path,
"_".join([prefix, "transcript.gff"]))
else:
tran_file = None
retrieve_uniprot(uniprot, os.path.join(self.gff_path, gff),
out_file, tran_file, type_)
def _merge_files(self, gffs, out_path, out_folder):
folders = []
for folder in os.listdir(gffs):
if folder.endswith("gff_folder"):
folder_prefix = folder.replace(".gff_folder", "")
folder_path = os.path.join(out_folder, folder_prefix)
self.helper.check_make_folder(folder_path)
folders.append(folder_path)
filenames = []
for gff in os.listdir(os.path.join(gffs, folder)):
if gff.endswith(".gff"):
filenames.append(gff.replace(".gff", ""))
out_all = os.path.join(folder_path, self.all_strain)
if len(filenames) > 1:
if self.all_strain in os.listdir(folder_path):
os.remove(out_all)
for filename in filenames:
csv_file = "_".join([filename, "uniprot.csv"])
self.helper.merge_file(os.path.join(out_path,
filename, csv_file), out_all)
shutil.copy(os.path.join(out_path, filename, csv_file),
folder_path)
else:
shutil.copyfile(os.path.join(out_path, filenames[0],
"_".join([filenames[0], "uniprot.csv"])),
out_all)
self.helper.remove_all_content(out_path, None, "dir")
self.helper.remove_all_content(out_path, None, "file")
for folder in folders:
folder_prefix = folder.split("/")[-1]
shutil.move(folder, os.path.join(out_path, folder_prefix))
def _stat(self, out_path, stat_path, go, goslim, out_folder):
for folder in os.listdir(out_path):
strain_stat_path = os.path.join(stat_path, folder)
self.helper.check_make_folder(strain_stat_path)
fig_path = os.path.join(strain_stat_path, "figs")
if "fig" not in os.listdir(strain_stat_path):
os.mkdir(fig_path)
print("Computing statistics of {0}".format(folder))
map2goslim(goslim, go,
os.path.join(out_path, folder, self.all_strain),
os.path.join(strain_stat_path,
"_".join(["stat", folder + ".csv"])),
out_folder)
self.helper.move_all_content(out_folder, fig_path,
["_three_roots.png"])
self.helper.move_all_content(out_folder, fig_path,
["_molecular_function.png"])
self.helper.move_all_content(out_folder, fig_path,
["_cellular_component.png"])
self.helper.move_all_content(out_folder, fig_path,
["_biological_process.png"])
def run_go_term(self, args_go):
for gff in os.listdir(args_go.gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(
args_go.gffs, gff))
self.multiparser.parser_gff(args_go.gffs, None)
if args_go.trans is not None:
self.multiparser.parser_gff(args_go.trans, "transcript")
print("Computing all CDS...")
#.........这里部分代码省略.........
示例13: SNPCalling
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import move_all_content [as 别名]
class SNPCalling(object):
'''detection of SNP'''
def __init__(self, args_snp):
self.multiparser = Multiparser()
self.seq_editer = SeqEditer()
self.helper = Helper()
if args_snp.types == "related_genome":
file_type = "compare_related_and_reference_genomes"
else:
file_type = "mutations_of_reference_genomes"
self.seq_path = os.path.join(args_snp.out_folder, file_type, "seqs")
self.stat_path = os.path.join(args_snp.out_folder, file_type,
"statistics")
self.fig_path = os.path.join(self.stat_path, "figs")
self.helper.check_make_folder(self.fig_path)
self.outputs = {"table": os.path.join(
args_snp.out_folder, file_type, "SNP_tables"),
"raw": os.path.join(
args_snp.out_folder, file_type, "SNP_raw_outputs"),
"tmp": os.path.join(args_snp.out_folder, "tmp_bcf"),
"depth": os.path.join(args_snp.out_folder, "tmp_depth")}
self.bams = {"whole": os.path.join(args_snp.out_folder,
"whole_reads.bam"),
"sort": os.path.join(args_snp.out_folder,
"whole_reads_sorted.bam"),
"bams": []}
self.header = os.path.join(args_snp.out_folder, "header")
self.baqs = {"with": "with_BAQ", "without": "without_BAQ",
"extend": "extend_BAQ"}
def _transcript_snp(self, fasta, out_table_prefix, type_,
prefix, bam_datas, table_path, args_snp):
seq_path = os.path.join(self.seq_path, self.baqs[type_], prefix)
for bam in bam_datas:
stat_prefix = os.path.join(self.stat_path, "_".join([
"stat", "_".join([prefix, self.baqs[type_], bam["sample"]]),
"SNP"]))
snp_file = os.path.join(self.outputs["raw"], prefix, "_".join(
[prefix, self.baqs[type_], bam["sample"] + ".vcf"]))
snp_detect(
fasta, snp_file, self.outputs["depth"] + bam["sample"],
"_".join([out_table_prefix, bam["sample"]]),
os.path.join(seq_path, "_".join([prefix, bam["sample"]])),
bam["bam_number"], stat_prefix, args_snp, bam["rep"])
self.helper.move_all_content(table_path, self.fig_path, [".png"])
def _get_para(self, args_snp):
if args_snp.caller == "c":
bcf_para = "-vcO"
else:
bcf_para = "-vmO"
return bcf_para
def _run_tools(self, fasta_file, type_, args_snp, bam_datas, log):
bcf_para = self._get_para(args_snp)
for bam in bam_datas:
bam_file = os.path.join(args_snp.out_folder,
bam["sample"] + ".bam")
if type_ == "with":
command = [args_snp.samtools_path, "mpileup", "-t", "DP"]
elif type_ == "without":
command = [args_snp.samtools_path, "mpileup", "-t", "DP", "-B"]
elif type_ == "extend":
command = [args_snp.samtools_path, "mpileup", "-t", "DP", "-E"]
if args_snp.rg:
command = command + ["-ugf", fasta_file, bam_file]
else:
command = command + ["--ignore-RG", "-ugf", fasta_file, bam_file]
log.write(" ".join(command) + ">" + self.outputs["tmp"] + "\n")
os.system(" ".join(command) + ">" + self.outputs["tmp"])
bam["vcf"] = os.path.join(self.outputs["raw"], "_".join(
[self.baqs[type_], bam["sample"] + ".vcf"]))
if args_snp.chrom == "1":
log.write(" ".join([
args_snp.bcftools_path, "call", "--ploidy", args_snp.chrom,
self.outputs["tmp"], bcf_para, "v", "-o", bam["vcf"]]) + "\n")
call([args_snp.bcftools_path, "call", "--ploidy", args_snp.chrom,
self.outputs["tmp"], bcf_para, "v", "-o", bam["vcf"]])
elif args_snp.chrom == "2":
log.write(" ".join([args_snp.bcftools_path, "call",
self.outputs["tmp"], bcf_para, "v", "-o", bam["vcf"]]) + "\n")
call([args_snp.bcftools_path, "call",
self.outputs["tmp"], bcf_para, "v", "-o", bam["vcf"]])
log.write("Done!\n")
log.write("The following files are generated:\n")
for file_ in os.listdir(self.outputs["raw"]):
log.write("\t" + os.path.join(self.outputs["raw"], file_) + "\n")
def _parse_vcf_by_fa(self, args_snp, type_, num_prog, log):
seq_names = []
fa_prefixs = []
log.write("Parsing Vcf files by comparing fasta information.\n")
for fa in os.listdir(args_snp.fastas):
if (fa != "all.fa") and (not fa.endswith(".fai")):
with open(os.path.join(args_snp.fastas, fa)) as fh:
for line in fh:
line = line.strip()
if line.startswith(">"):
seq_names.append(line[1:])
#.........这里部分代码省略.........