本文整理汇总了Python中annogesiclib.multiparser.Multiparser.combine_gff方法的典型用法代码示例。如果您正苦于以下问题:Python Multiparser.combine_gff方法的具体用法?Python Multiparser.combine_gff怎么用?Python Multiparser.combine_gff使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类annogesiclib.multiparser.Multiparser
的用法示例。
在下文中一共展示了Multiparser.combine_gff方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: sORFDetection
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_gff [as 别名]
class sORFDetection(object):
'''detection of sORF'''
def __init__(self, args_sorf):
self.multiparser = Multiparser()
self.helper = Helper()
if args_sorf.tsss is not None:
self.tss_path = os.path.join(args_sorf.tsss, "tmp")
else:
self.tss_path = None
if args_sorf.srnas is not None:
self.srna_path = os.path.join(args_sorf.srnas, "tmp")
else:
self.srna_path = None
self.gff_output = os.path.join(args_sorf.out_folder, "gffs")
self.table_output = os.path.join(args_sorf.out_folder, "tables")
self.tran_path = os.path.join(args_sorf.trans, "tmp")
self.fasta_path = os.path.join(args_sorf.fastas, "tmp")
self.all_cand = "all_candidates"
self.best = "best_candidates"
def _check_gff(self, gffs):
for gff in os.listdir(gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(gffs, gff))
def _check_necessary_files(self, args_sorf, log):
if (args_sorf.gffs is None) or (args_sorf.trans is None) or (
(args_sorf.tex_wigs is None) and (args_sorf.frag_wigs is None)):
print("Error: lack required files!")
log.write("genome annotation, transcript file or wiggle files "
"are not assigned.\n")
sys.exit()
if args_sorf.utr_detect:
if (args_sorf.tsss is None):
print("Error: TSS files are required for UTR derived"
" sORF detection!")
log.write("TSS files are required for UTR derived"
" sORF detection!\n")
sys.exit()
self._check_gff(args_sorf.gffs)
self.multiparser.parser_gff(args_sorf.gffs, None)
if args_sorf.tsss is not None:
self._check_gff(args_sorf.tsss)
self.multiparser.parser_gff(args_sorf.tsss, "TSS")
self.multiparser.combine_gff(args_sorf.gffs, self.tss_path,
None, "TSS")
self._check_gff(args_sorf.trans)
if args_sorf.srnas is not None:
self._check_gff(args_sorf.srnas)
self.multiparser.parser_gff(args_sorf.srnas, "sRNA")
self.multiparser.combine_gff(args_sorf.gffs, self.srna_path,
None, "sRNA")
def _start_stop_codon(self, prefixs, args_sorf, log):
'''detect the sORF based on start and stop codon
and ribosome binding site'''
log.write("Running sORF_detection.py for detecting sORFs.\n")
log.write("The following files are generated:\n")
for prefix in prefixs:
print("Searching sORFs of {0}".format(prefix))
if self.srna_path is not None:
srna_file = os.path.join(self.srna_path,
"_".join([prefix, "sRNA.gff"]))
else:
srna_file = None
if self.tss_path is not None:
tss_file = os.path.join(self.tss_path,
"_".join([prefix, "TSS.gff"]))
else:
tss_file = None
sorf_detection(os.path.join(self.fasta_path, prefix + ".fa"),
srna_file, os.path.join(args_sorf.out_folder,
"_".join([prefix, "inter.gff"])), tss_file,
os.path.join(args_sorf.wig_path,
"_".join([prefix, "forward.wig"])),
os.path.join(args_sorf.wig_path,
"_".join([prefix, "reverse.wig"])),
os.path.join(self.gff_output, self.all_cand,
"_".join([prefix, "sORF"])), args_sorf)
if "_".join([prefix, "sORF_all.gff"]) in os.listdir(
os.path.join(self.gff_output, self.all_cand)):
gff_all = os.path.join(self.gff_output, self.all_cand,
"_".join([prefix, "sORF.gff"]))
gff_best = os.path.join(self.gff_output, self.best,
"_".join([prefix, "sORF.gff"]))
csv_all = os.path.join(self.table_output, self.all_cand,
"_".join([prefix, "sORF.csv"]))
csv_best = os.path.join(self.table_output, self.best,
"_".join([prefix, "sORF.csv"]))
shutil.move(os.path.join(self.gff_output, self.all_cand,
"_".join([prefix, "sORF_all.gff"])), gff_all)
shutil.move(os.path.join(self.gff_output, self.all_cand,
"_".join([prefix, "sORF_best.gff"])), gff_best)
shutil.move(os.path.join(self.gff_output, self.all_cand,
"_".join([prefix, "sORF_all.csv"])), csv_all)
shutil.move(os.path.join(self.gff_output, self.all_cand,
"_".join([prefix, "sORF_best.csv"])), csv_best)
log.write("\t" + gff_all + "\n")
log.write("\t" + gff_best + "\n")
#.........这里部分代码省略.........
示例2: Terminator
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_gff [as 别名]
#.........这里部分代码省略.........
stdout=out)
def _run_TransTermHP(self, args_term):
self.helper.check_make_folder(self.tmps["transterm"])
for file_ in os.listdir(self.combine_path):
if ".ptt" in file_:
prefix = file_.replace(".ptt", "")
fasta = self.helper.get_correct_file(
self.fasta_path, ".fa", prefix, None, None)
if not fasta:
print("Error: no proper file - {0}.fa".format(prefix))
sys.exit()
out_path = os.path.join(args_term.hp_folder, prefix)
self.helper.check_make_folder(out_path)
out = open(os.path.join(out_path,
"_".join([prefix, "terminators.txt"])), "w")
self._TransTermHP(fasta, file_, out_path,
prefix, out, args_term)
out.close()
shutil.rmtree(self.combine_path)
def _convert_to_gff(self, prefixs, args_term):
for prefix in prefixs:
for folder in os.listdir(args_term.hp_folder):
if prefix == folder:
out_path = os.path.join(args_term.hp_folder, folder)
for file_ in os.listdir(out_path):
if file_.endswith(".bag"):
out_file = os.path.join(
self.tmps["transterm"],
"_".join([prefix, self.tmps["hp_gff"]]))
self.converter.convert_transtermhp2gff(
os.path.join(out_path, file_), out_file)
self.multiparser.combine_gff(args_term.gffs, self.tmps["transterm"],
None, self.tmps["hp"])
def _combine_wigs(self, args_term):
if (args_term.tex_wigs is not None) and (
args_term.frag_wigs is not None):
folder = args_term.tex_wigs.split("/")
folder = "/".join(folder[:-1])
merge_wigs = os.path.join(folder, "merge_wigs")
self.helper.check_make_folder(merge_wigs)
for wig in os.listdir(args_term.tex_wigs):
if os.path.isdir(os.path.join(args_term.tex_wigs, wig)):
pass
else:
shutil.copy(os.path.join(args_term.tex_wigs, wig),
merge_wigs)
for wig in os.listdir(args_term.frag_wigs):
if os.path.isdir(os.path.join(args_term.frag_wigs, wig)):
pass
else:
shutil.copy(os.path.join(args_term.frag_wigs, wig),
merge_wigs)
elif (args_term.tex_wigs is not None):
merge_wigs = args_term.tex_wigs
elif (args_term.frag_wigs is not None):
merge_wigs = args_term.frag_wigs
else:
print("Error: no proper wig files!!!")
sys.exit()
return merge_wigs
def _merge_sRNA(self, sRNAs, prefixs, gff_path):
if sRNAs is not None:
示例3: MEME
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_gff [as 别名]
#.........这里部分代码省略.........
for folder in os.listdir(os.path.join(output_folder,
strain, "GLAM2")):
csv_file = os.path.join(output_folder, strain,
"GLAM2", folder, "glam2.csv")
gen_promoter_table(os.path.join(output_folder, strain,
"GLAM2", folder, "glam2.txt"),
csv_file, tss_file, "glam2")
log.write("\t" + csv_file + "\n")
def _get_upstream(self, args_pro, prefix, tss, fasta):
'''get upstream sequence of TSS'''
if args_pro.source:
print("Generating fasta file of {0}".format(prefix))
upstream(os.path.join(self.tss_path, tss),
os.path.join(args_pro.fastas, fasta),
None, None, args_pro, prefix)
else:
if (args_pro.gffs is None):
print("Error: Please assign proper annotation!!!")
sys.exit()
if "TSS_classes" not in os.listdir(args_pro.output_folder):
os.mkdir(os.path.join(args_pro.output_folder, "TSS_classes"))
print("Classifying TSSs and extracting sequence of {0}".format(prefix))
upstream(os.path.join(self.tss_path, tss),
os.path.join(args_pro.fastas, fasta),
os.path.join(self.gff_path, prefix + ".gff"),
os.path.join(args_pro.output_folder, "TSS_classes",
"_".join([prefix, "TSS.gff"])), args_pro, prefix)
def _get_used_tss_type(self, args_pro):
input_fastas = []
for tss in args_pro.use_tss:
if int(tss) == 1:
input_fastas.append("all_types")
elif int(tss) == 2:
input_fastas.append("primary")
elif int(tss) == 3:
input_fastas.append("secondary")
elif int(tss) == 4:
input_fastas.append("internal")
elif int(tss) == 5:
input_fastas.append("antisense")
elif int(tss) == 6:
input_fastas.append("orphan")
elif int(tss) == 7:
input_fastas.append("without_orphan")
else:
print("Error: The assignment of --use_tss_typ is wrong!")
sys.exit()
return input_fastas
def run_meme(self, args_pro, log):
if "allfasta.fa" in os.listdir(args_pro.fastas):
os.remove(self.all_fasta)
if "allfasta.fa_folder" in os.listdir(args_pro.fastas):
shutil.rmtree(os.path.join(args_pro.fastas,
"allfasta.fa_folder"))
self.multiparser.parser_fasta(args_pro.fastas)
self.multiparser.parser_gff(args_pro.tsss, "TSS")
if "allfasta_TSS.gff" in os.listdir(self.tss_path):
os.remove(self.all_tss)
if args_pro.gffs is not None:
self._check_gff(args_pro.gffs)
self.multiparser.parser_gff(args_pro.gffs, None)
self.multiparser.combine_gff(args_pro.fastas, self.gff_path,
"fasta", None)
self._check_gff(args_pro.tsss)
self.multiparser.combine_gff(args_pro.fastas, self.tss_path,
"fasta", "TSS")
self.helper.check_make_folder(self.out_fasta)
self.helper.check_make_folder(self.tmp_folder)
prefixs = []
log.write("Running .TSS_upstream.py to extract the upstream "
"sequences of TSSs.\n")
log.write("The following files are generated:\n")
for tss in os.listdir(self.tss_path):
prefix = tss.replace("_TSS.gff", "")
prefixs.append(prefix)
self.helper.check_make_folder(os.path.join(args_pro.output_folder,
prefix))
self.helper.check_make_folder(os.path.join(self.out_fasta,
prefix))
input_path = os.path.join(self.out_fasta, prefix)
fasta = self._get_fasta_file(args_pro.fastas, prefix)
self._get_upstream(args_pro, prefix, tss, fasta)
self._move_and_merge_fasta(input_path, prefix)
self._split_fasta_by_strain(input_path)
for file_ in os.listdir(input_path):
log.write("\t" + os.path.join(input_path, file_) + "\n")
if args_pro.combine:
self._combine_file(prefixs, args_pro)
for file_ in os.listdir(os.path.join(self.out_fasta, "allfasta")):
log.write("\t" + os.path.join(
self.out_fasta, "allfasta", file_) + "\n")
input_fastas = self._get_used_tss_type(args_pro)
self._run_program(prefixs, args_pro, log, input_fastas)
print("Generating the tables")
self._gen_table(args_pro.output_folder, prefixs,
args_pro.combine, args_pro.program, log)
self._remove_files(args_pro)
示例4: TranscriptDetection
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_gff [as 别名]
class TranscriptDetection(object):
'''doing for transcript detection'''
def __init__(self, args_tran):
self.multiparser = Multiparser()
self.helper = Helper()
self.converter = Converter()
self.gff_outfolder = os.path.join(args_tran.out_folder, "gffs")
self.tran_path = os.path.join(self.gff_outfolder, "tmp")
self.stat_path = os.path.join(args_tran.out_folder, "statistics")
self.tmps = {"gff": "tmp.gff", "merge": "tmp_merge",
"tran": os.path.join(args_tran.out_folder, "tmp_tran"),
"tss_ta": os.path.join(self.gff_outfolder, "tmp_tss_ta"),
"ta_tss": os.path.join(self.gff_outfolder, "tmp_ta_tss"),
"ta_gff": os.path.join(self.gff_outfolder, "tmp_ta_gff"),
"gff_ta": os.path.join(self.gff_outfolder, "tmp_gff_ta"),
"uni": os.path.join(self.gff_outfolder, "tmp_uni"),
"overlap": os.path.join(
self.gff_outfolder, "tmp_overlap")}
self.frag = "transcript_fragment.gff"
self.tex = "transcript_tex_notex.gff"
self.endfix_tran = "transcript.gff"
def _compute_transcript(self, wig_f, wig_r, wig_folder, wig_type, strain,
libs, args_tran):
print("Computing transcripts for {0}".format(strain))
out = os.path.join(args_tran.out_folder, "_".join([strain, wig_type]))
detect_transcript(wig_f, wig_r, wig_folder, libs, out, wig_type, args_tran)
def _compute(self, wig_type, wigs, libs, args_tran):
strains = []
wig_folder = os.path.join(wigs, "tmp")
for wig in os.listdir(wig_folder):
if wig.endswith("_forward.wig"):
strains.append(wig.replace("_forward.wig", ""))
for strain in strains:
f_file = os.path.join(wig_folder, "_".join(
[strain, "forward.wig"]))
r_file = os.path.join(wig_folder, "_".join(
[strain, "reverse.wig"]))
self._compute_transcript(f_file, r_file, wigs, wig_type,
strain, libs, args_tran)
return strains
def _compare_tss(self, tas, args_tran, log):
self.multiparser.parser_gff(args_tran.compare_tss, "TSS")
self.multiparser.combine_gff(
self.gff_outfolder,
os.path.join(args_tran.compare_tss, "tmp"),
"transcript", "TSS")
print("Comaring of transcripts and TSSs")
log.write("Running stat_TA_comparison.py to compare transcripts "
"with TSSs.\n")
tss_folder = os.path.join(args_tran.compare_tss, "tmp")
for ta in tas:
ta_file = os.path.join(self.gff_outfolder,
"_".join([ta, self.endfix_tran]))
stat_tss_out = os.path.join(
self.stat_path, "".join([
"stat_compare_transcript_TSS_",
ta, ".csv"]))
for tss in os.listdir(tss_folder):
filename = tss.split("_TSS")
if (filename[0] == ta) and (tss.endswith(".gff")):
stat_ta_tss(ta_file, os.path.join(tss_folder, tss),
stat_tss_out, self.tmps["ta_tss"],
self.tmps["tss_ta"], args_tran.fuzzy)
os.remove(ta_file)
os.remove(os.path.join(tss_folder, tss))
self.helper.sort_gff(self.tmps["ta_tss"], ta_file)
self.helper.sort_gff(
self.tmps["tss_ta"], os.path.join(
args_tran.compare_tss, tss))
os.remove(self.tmps["tss_ta"])
os.remove(self.tmps["ta_tss"])
log.write("\t" + stat_tss_out + "\n")
def _compare_cds(self, tas, args_tran, log):
self.multiparser.parser_gff(args_tran.gffs, None)
self.multiparser.combine_gff(
self.gff_outfolder, os.path.join(args_tran.gffs, "tmp"),
"transcript", None)
print("Comaring of transcripts and genome annotations")
cds_folder = os.path.join(args_tran.gffs, "tmp")
log.write("Running stat_TA_comparison.py to compare transcripts "
"with genome annotations.\n")
for ta in tas:
ta_file = os.path.join(self.gff_outfolder,
"_".join([ta, self.endfix_tran]))
stat_gff_out = os.path.join(self.stat_path, "".join([
"stat_compare_transcript_genome_", ta, ".csv"]))
for gff in os.listdir(cds_folder):
if (gff[:-4] == ta) and (gff.endswith(".gff")):
cds_file = os.path.join(cds_folder, gff)
stat_ta_gff(ta_file, cds_file, stat_gff_out,
self.tmps["ta_gff"], self.tmps["gff_ta"],
args_tran.c_feature)
os.remove(ta_file)
os.remove(os.path.join(args_tran.gffs, gff))
self.helper.sort_gff(self.tmps["ta_gff"], ta_file)
#.........这里部分代码省略.........
示例5: sRNADetection
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_gff [as 别名]
#.........这里部分代码省略.........
args_srna.out_folder, "_".join(["tmp_utr_frag", prefix]))
files["frag_csv"] = os.path.join(
args_srna.out_folder, "_".join(["tmp_utr_frag_table", prefix]))
args_srna = self.args_container.container_utrsrna(
os.path.join(args_srna.gffs, gff), tran, tss, files,
pro, os.path.join(self.fasta_path, prefix + ".fa"),
"frag", prefix, args_srna)
utr_derived_srna(args_srna)
files["merge_csv"] = "_".join([self.prefixs["utr_table"], prefix])
files["merge_gff"] = "_".join([self.prefixs["utr"], prefix])
self._merge_frag_tex_file(files, args_srna)
filter_utr(files["merge_gff"], files["merge_csv"], args_srna.min_utr)
def _check_necessary_file(self, args_srna):
if (args_srna.gffs is None) or (args_srna.trans is None) or (
(args_srna.tex_wigs is None) and (
args_srna.frag_wigs is None)):
print("Error: lack required files!!!!")
sys.exit()
if args_srna.utr_srna:
if (args_srna.tss_folder is None):
print("Error: lack required TSS files for UTR "
"derived sRNA detection!!!!")
sys.exit()
if (args_srna.pro_folder is None):
print("Warning: lack Processing site files for UTR "
"derived sRNA detection!!!")
print("it may effect the results!!!!")
self._check_gff(args_srna.gffs)
self._check_gff(args_srna.trans)
if args_srna.tss_folder is not None:
self._check_gff(args_srna.tss_folder)
self.multiparser.parser_gff(args_srna.tss_folder, "TSS")
self.multiparser.combine_gff(args_srna.gffs, self.tss_path,
None, "TSS")
if args_srna.pro_folder is not None:
self._check_gff(args_srna.pro_folder)
self.multiparser.parser_gff(args_srna.pro_folder, "processing")
self.multiparser.combine_gff(args_srna.gffs, self.pro_path,
None, "processing")
if args_srna.sorf_file is not None:
self._check_gff(args_srna.sorf_file)
self.multiparser.parser_gff(args_srna.sorf_file, "sORF")
self.multiparser.combine_gff(args_srna.gffs, self.sorf_path,
None, "sORF")
if args_srna.utr_srna or ("sec_str" in args_srna.import_info) or (
"blast_nr" in args_srna.import_info) or (
"blast_srna" in args_srna.import_info):
if args_srna.fastas is None:
print("Error: lack required fasta files for UTR "
"derived sRNA detection!!!!")
sys.exit()
self.multiparser.parser_fasta(args_srna.fastas)
self.multiparser.combine_fasta(args_srna.gffs,
self.fasta_path, None)
if args_srna.terms is not None:
self._check_gff(args_srna.terms)
self.multiparser.parser_gff(args_srna.terms, "term")
self.multiparser.combine_gff(args_srna.gffs, self.term_path,
None, "term")
else:
self.term_path = None
def _run_program(self, args_srna):
prefixs = []
tss = None
示例6: OperonDetection
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_gff [as 别名]
class OperonDetection(object):
'''detection of operon'''
def __init__(self, args_op):
self.multiparser = Multiparser()
self.helper = Helper()
if args_op.tsss is not None:
self.tss_path = os.path.join(args_op.tsss, "tmp")
else:
self.tss_path = None
self.tran_path = os.path.join(args_op.trans, "tmp")
self.table_path = os.path.join(args_op.output_folder, "tables")
if args_op.terms is not None:
self._check_gff(args_op.terms, "term")
self.term_path = os.path.join(args_op.terms, "tmp")
else:
self.term_path = None
def _check_gff(self, gffs, type_):
for gff in os.listdir(gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(gffs, gff))
def _detect_operon(self, prefixs, args_op, log):
log.write("Running detect_operon.py to detect operon.\n")
log.write("The the following files are generated:\n")
for prefix in prefixs:
out_gff = os.path.join(args_op.output_folder, "gffs",
"_".join([prefix, "operon.gff"]))
out_table = os.path.join(self.table_path,
"_".join([prefix, "operon.csv"]))
print("Detecting operons of {0}".format(prefix))
if self.tss_path is None:
tss = False
else:
tss = self.helper.get_correct_file(
self.tss_path, "_TSS.gff", prefix, None, None)
tran = self.helper.get_correct_file(
self.tran_path, "_transcript.gff", prefix, None, None)
gff = self.helper.get_correct_file(
args_op.gffs, ".gff", prefix, None, None)
if self.term_path is None:
term = False
else:
term = self.helper.get_correct_file(
self.term_path, "_term.gff", prefix, None, None)
operon(tran, tss, gff, term, args_op.tss_fuzzy,
args_op.term_fuzzy, args_op.length, out_table, out_gff)
log.write("\t" + out_table + "\n")
log.write("\t" + out_gff + "\n")
def _check_and_parser_gff(self, args_op):
self._check_gff(args_op.gffs, "gff")
self._check_gff(args_op.trans, "tran")
self.multiparser.parser_gff(args_op.gffs, None)
self.multiparser.parser_gff(args_op.trans, "transcript")
self.multiparser.combine_gff(args_op.gffs, self.tran_path,
None, "transcript")
if args_op.tsss is not None:
self._check_gff(args_op.tsss, "tss")
self.multiparser.parser_gff(args_op.tsss, "TSS")
self.multiparser.combine_gff(args_op.gffs, self.tss_path, None, "TSS")
if args_op.terms is not None:
self._check_gff(args_op.terms, "term")
self.multiparser.parser_gff(args_op.terms, "term")
self.multiparser.combine_gff(args_op.gffs, self.term_path,
None, "term")
def _stat(self, table_path, stat_folder, log):
log.write("Running stat_operon.py to do statistics.\n")
for table in os.listdir(table_path):
if table.endswith("_operon.csv"):
filename = "_".join(["stat", table])
out_stat = os.path.join(stat_folder, filename)
stat(os.path.join(table_path, table), out_stat)
log.write("\t" + out_stat + "\n")
def run_operon(self, args_op, log):
self._check_and_parser_gff(args_op)
prefixs = []
for gff in os.listdir(args_op.gffs):
if gff.endswith(".gff"):
prefixs.append(gff.replace(".gff", ""))
self._detect_operon(prefixs, args_op, log)
self._stat(self.table_path, args_op.stat_folder, log)
self.helper.remove_tmp_dir(args_op.gffs)
self.helper.remove_tmp_dir(args_op.tsss)
self.helper.remove_tmp_dir(args_op.trans)
if args_op.terms is not None:
self.helper.remove_tmp_dir(args_op.terms)
示例7: OperonDetection
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_gff [as 别名]
class OperonDetection(object):
def __init__(self, args_op):
self.multiparser = Multiparser()
self.helper = Helper()
self.tss_path = os.path.join(args_op.tsss, "tmp")
self.tran_path = os.path.join(args_op.trans, "tmp")
self.utr5_path = os.path.join(args_op.utr5s, "tmp")
self.utr3_path = os.path.join(args_op.utr3s, "tmp")
self.table_path = os.path.join(args_op.output_folder, "tables")
if args_op.terms is not None:
self._check_gff(args_op.terms, "term")
self.term_path = os.path.join(args_op.terms, "tmp")
else:
self.term_path = None
def _check_gff(self, gffs, type_):
for gff in os.listdir(gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(gffs, gff))
def _detect_operon(self, prefixs, args_op):
for prefix in prefixs:
out_table = os.path.join(self.table_path,
"_".join(["operon", prefix + ".csv"]))
print("Detection operons of {0}".format(prefix))
tss = self.helper.get_correct_file(
self.tss_path, "_TSS.gff", prefix, None, None)
tran = self.helper.get_correct_file(
self.tran_path, "_transcript.gff", prefix, None, None)
gff = self.helper.get_correct_file(
args_op.gffs, ".gff", prefix, None, None)
if self.term_path is None:
term = False
else:
term = self.helper.get_correct_file(
self.term_path, "_term.gff", prefix, None, None)
operon(tran, tss, gff, term, args_op.tss_fuzzy,
args_op.term_fuzzy, args_op.length, out_table)
def _check_and_parser_gff(self, args_op):
self._check_gff(args_op.tsss, "tss")
self._check_gff(args_op.gffs, "gff")
self._check_gff(args_op.trans, "tran")
self._check_gff(args_op.utr5s, "utr")
self._check_gff(args_op.utr3s, "utr")
self.multiparser.parser_gff(args_op.gffs, None)
self.multiparser.parser_gff(args_op.tsss, "TSS")
self.multiparser.combine_gff(args_op.gffs, self.tss_path, None, "TSS")
self.multiparser.parser_gff(args_op.trans, "transcript")
self.multiparser.combine_gff(args_op.gffs, self.tran_path,
None, "transcript")
self.multiparser.parser_gff(args_op.utr5s, "5UTR")
self.multiparser.combine_gff(args_op.gffs, self.utr5_path,
None, "5UTR")
self.multiparser.parser_gff(args_op.utr3s, "3UTR")
self.multiparser.combine_gff(args_op.gffs, self.utr3_path,
None, "3UTR")
if args_op.terms is not None:
self._check_gff(args_op.terms, "term")
self.multiparser.parser_gff(args_op.terms, "term")
self.multiparser.combine_gff(args_op.gffs, self.term_path,
None, "term")
def _stat(self, table_path, stat_folder):
for table in os.listdir(table_path):
if table.startswith("operon_") and table.endswith(".csv"):
filename = "_".join(["stat", table])
out_stat = os.path.join(stat_folder, filename)
stat(os.path.join(table_path, table), out_stat)
def _combine_gff(self, prefixs, args_op):
for prefix in prefixs:
out_file = os.path.join(args_op.output_folder, "gffs",
"_".join([prefix, "all_features.gff"]))
print("Combine all features of {0}".format(prefix))
tss = self.helper.get_correct_file(
self.tss_path, "_TSS.gff", prefix, None, None)
tran = self.helper.get_correct_file(
self.tran_path, "_transcript.gff", prefix, None, None)
gff = self.helper.get_correct_file(
args_op.gffs, ".gff", prefix, None, None)
utr5 = self.helper.get_correct_file(
self.utr5_path, "_5UTR.gff", prefix, None, None)
utr3 = self.helper.get_correct_file(
self.utr3_path, "_3UTR.gff", prefix, None, None)
if self.term_path is None:
term = None
else:
term = self.helper.get_correct_file(
self.term_path, "_term.gff", prefix, None, None)
combine_gff(gff, tran, tss, utr5, utr3, term,
args_op.tss_fuzzy, args_op.term_fuzzy, out_file)
def run_operon(self, args_op):
self._check_and_parser_gff(args_op)
prefixs = []
for gff in os.listdir(args_op.gffs):
if gff.endswith(".gff"):
prefixs.append(gff.replace(".gff", ""))
#.........这里部分代码省略.........
示例8: TestMultiparser
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_gff [as 别名]
class TestMultiparser(unittest.TestCase):
def setUp(self):
self.multiparser = Multiparser()
self.example = Example()
self.ref_folder = "ref_folder"
if (not os.path.exists(self.ref_folder)):
os.mkdir(self.ref_folder)
self.tar_folder = "tar_folder"
if (not os.path.exists(self.tar_folder)):
os.mkdir(self.tar_folder)
def tearDown(self):
if os.path.exists(self.ref_folder):
shutil.rmtree(self.ref_folder)
if os.path.exists(self.tar_folder):
shutil.rmtree(self.tar_folder)
def test_combine_fasta(self):
tmp_tar = os.path.join(self.tar_folder, "tmp")
tmp_ref = os.path.join(self.ref_folder, "test.gff_folder")
os.mkdir(tmp_ref)
os.mkdir(tmp_tar)
sub_fasta1 = os.path.join(tmp_tar, "aaa.fa")
with open(sub_fasta1, "w") as rh:
rh.write(self.example.sub_fasta1)
sub_fasta2 = os.path.join(tmp_tar, "bbb.fa")
with open(sub_fasta2, "w") as rh:
rh.write(self.example.sub_fasta2)
sub_gff1 = os.path.join(tmp_ref, "aaa.gff")
with open(sub_gff1, "w") as rh:
rh.write(self.example.sub_gff1)
sub_gff2 = os.path.join(tmp_ref, "bbb.gff")
with open(sub_gff2, "w") as rh:
rh.write(self.example.sub_gff2)
self.multiparser.combine_fasta(self.ref_folder, tmp_tar, None)
self.assertTrue(os.path.exists(os.path.join(tmp_tar, "test.fa")))
def test_combine_wig(self):
tmp_tar = os.path.join(self.tar_folder, "tmp")
tmp_ref = os.path.join(self.ref_folder, "test.fa_folder")
os.mkdir(tmp_ref)
os.mkdir(tmp_tar)
sub_fasta1 = os.path.join(tmp_ref, "aaa.fa")
with open(sub_fasta1, "w") as rh:
rh.write(self.example.sub_fasta1)
sub_fasta2 = os.path.join(tmp_ref, "bbb.fa")
with open(sub_fasta2, "w") as rh:
rh.write(self.example.sub_fasta2)
sub_wig1 = os.path.join(tmp_tar, "test_forward.wig_STRAIN_aaa.wig")
sub_wig2 = os.path.join(tmp_tar, "test_forward.wig_STRAIN_bbb.wig")
sub_wig3 = os.path.join(tmp_tar, "test_reverse.wig_STRAIN_aaa.wig")
sub_wig4 = os.path.join(tmp_tar, "test_reverse.wig_STRAIN_bbb.wig")
wig_files = [sub_wig1, sub_wig2, sub_wig3, sub_wig4]
example_wigs = [self.example.sub_f_wig1, self.example.sub_f_wig2,
self.example.sub_r_wig1, self.example.sub_r_wig2]
for index in range(0, 4):
with open(wig_files[index], "w") as fh:
fh.write(example_wigs[index])
libs = ["test_forward.wig_STRAIN_aaa.wig:frag:1:a:+", "test_reverse.wig_STRAIN_aaa.wig:frag:1:a:-"]
self.multiparser.combine_wig(self.ref_folder, tmp_tar, "fasta", libs)
self.assertTrue(os.path.exists(os.path.join(tmp_tar, "test_forward.wig")))
self.assertTrue(os.path.exists(os.path.join(tmp_tar, "test_reverse.wig")))
def test_combine_gff(self):
tmp_tar = os.path.join(self.tar_folder, "tmp")
tmp_ref = os.path.join(self.ref_folder, "test.fa_folder")
os.mkdir(tmp_ref)
os.mkdir(tmp_tar)
sub_fasta1 = os.path.join(tmp_ref, "aaa.fa")
with open(sub_fasta1, "w") as rh:
rh.write(self.example.sub_fasta1)
sub_fasta2 = os.path.join(tmp_ref, "bbb.fa")
with open(sub_fasta2, "w") as rh:
rh.write(self.example.sub_fasta2)
sub_gff1 = os.path.join(tmp_tar, "aaa.gff")
with open(sub_gff1, "w") as rh:
rh.write(self.example.sub_gff1)
sub_gff2 = os.path.join(tmp_tar, "bbb.gff")
with open(sub_gff2, "w") as rh:
rh.write(self.example.sub_gff2)
self.multiparser.combine_gff(self.ref_folder, tmp_tar, "fasta", None)
self.assertTrue(os.path.exists(os.path.join(tmp_tar, "test.gff")))
def test_parser_fasta(self):
fasta_file = os.path.join(self.ref_folder, "test.fa")
with open(fasta_file, "w") as rh:
rh.write(self.example.fasta_file)
self.multiparser.parser_fasta(self.ref_folder)
self.assertTrue(os.path.exists(os.path.join(self.ref_folder, "tmp/aaa.fa")))
self.assertTrue(os.path.exists(os.path.join(self.ref_folder, "tmp/bbb.fa")))
self.assertTrue(os.path.exists(os.path.join(self.ref_folder, "test.fa_folder/aaa.fa")))
self.assertTrue(os.path.exists(os.path.join(self.ref_folder, "test.fa_folder/bbb.fa")))
def test_parser_gff(self):
gff_file = os.path.join(self.ref_folder, "test.gff")
with open(gff_file, "w") as rh:
rh.write(self.example.gff_file)
self.multiparser.parser_gff(self.ref_folder, None)
self.assertTrue(os.path.exists(os.path.join(self.ref_folder, "tmp/aaa.gff")))
#.........这里部分代码省略.........
示例9: Terminator
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_gff [as 别名]
#.........这里部分代码省略.........
if not fasta:
log.write("{0}.fa can not be found!.\n".format(prefix))
print("Error: {0}.fa can not be found!".format(prefix))
sys.exit()
out_path = os.path.join(args_term.hp_folder, prefix)
self.helper.check_make_folder(out_path)
out = open(os.path.join(out_path,
"_".join([prefix, "terminators.txt"])), "w")
self._TransTermHP(fasta, file_, out_path,
prefix, out, args_term, log)
log.write("Done!\n")
log.write("The following files are generated in {0}.\n".format(
out_path))
for file_ in os.listdir(out_path):
log.write("\t" + file_ + "\n")
out.close()
shutil.rmtree(self.combine_path)
def _convert_to_gff(self, prefixs, args_term, log):
log.write("Running coverter.py to convert the results of TransTermHP "
"to gff3 format.\n")
for prefix in prefixs:
for folder in os.listdir(args_term.hp_folder):
if prefix == folder:
out_path = os.path.join(args_term.hp_folder, folder)
for file_ in os.listdir(out_path):
if file_.endswith(".bag"):
out_file = os.path.join(
self.tmps["transterm"],
"_".join([prefix, self.tmps["hp_gff"]]))
self.converter.convert_transtermhp2gff(
os.path.join(out_path, file_), out_file)
log.write("\t" + out_file + " is generated.\n")
self.multiparser.combine_gff(args_term.gffs, self.tmps["transterm"],
None, self.tmps["hp"])
def _combine_wigs(self, args_term):
if (args_term.tex_wigs is not None) and (
args_term.frag_wigs is not None):
folder = args_term.tex_wigs.split("/")
folder = "/".join(folder[:-1])
merge_wigs = os.path.join(folder, "merge_wigs")
self.helper.check_make_folder(merge_wigs)
for wig in os.listdir(args_term.tex_wigs):
if os.path.isdir(os.path.join(args_term.tex_wigs, wig)):
pass
else:
shutil.copy(os.path.join(args_term.tex_wigs, wig),
merge_wigs)
for wig in os.listdir(args_term.frag_wigs):
if os.path.isdir(os.path.join(args_term.frag_wigs, wig)):
pass
else:
shutil.copy(os.path.join(args_term.frag_wigs, wig),
merge_wigs)
elif (args_term.tex_wigs is not None):
merge_wigs = args_term.tex_wigs
elif (args_term.frag_wigs is not None):
merge_wigs = args_term.frag_wigs
else:
print("Error: Wiggle files are not assigned!")
sys.exit()
return merge_wigs
def _merge_sRNA(self, sRNAs, prefixs, gff_path):
'''searching the terminator with sRNA information'''
示例10: CircRNADetection
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_gff [as 别名]
#.........这里部分代码省略.........
for prefix in tmp_prefixs:
self.helper.check_make_folder(os.path.join(self.gff_folder,
prefix))
shutil.copytree(prefix, os.path.join(self.splice_path, prefix))
self.helper.check_make_folder(os.path.join(
self.candidate_path, prefix))
print("comparing with annotation of {0}".format(prefix))
if self.splices["all_file"] in os.listdir(os.path.join(
self.splice_path, prefix)):
detect_circrna(os.path.join(self.splice_path, prefix,
self.splices["all_file"]), os.path.join(
self.gff_path, prefix + ".gff"),
os.path.join(self.candidate_path, prefix,
"_".join(["circRNA", prefix + "_all.csv"])),
args_circ, os.path.join(args_circ.stat_folder,
"_".join(["stat_circRNA", prefix + ".csv"])))
self.converter.convert_circ2gff(
os.path.join(self.candidate_path, prefix,
"_".join(["circRNA",
prefix + "_all.csv"])),
args_circ, os.path.join(
self.gff_folder, prefix,
"_".join([prefix, "circRNA_all.gff"])),
os.path.join(self.gff_folder, prefix,
"_".join([prefix, "circRNA_best.gff"])))
def _assign_merge_bam(self, args_circ):
remove_frags = []
bam_files = []
if (args_circ.normal_bams is not None) and (
args_circ.frag_bams is not None):
for frag in os.listdir(args_circ.frag_bams):
if frag.endswith(".bam"):
shutil.copyfile(os.path.join(args_circ.frag_bams, frag),
os.path.join(args_circ.normal_bams, frag))
remove_frags.append(frag)
merge_folder = args_circ.normal_bams
elif (args_circ.normal_bams is not None):
merge_folder = args_circ.normal_bams
elif (args_circ.frag_bams is not None):
merge_folder = args_circ.frag_bams
else:
print("Error: please assign bam folder or do alignment!!")
sys.exit()
for bam in os.listdir(merge_folder):
if bam.endswith(".bam"):
bam_files.append(os.path.join(merge_folder, bam))
return merge_folder, remove_frags, bam_files
def run_circrna(self, args_circ):
for gff in os.listdir(args_circ.gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(
args_circ.gffs, gff))
if args_circ.segemehl_path is None:
print("Error: please assign segemehl folder!!")
sys.exit()
self.multiparser.parser_gff(args_circ.gffs, None)
self.multiparser.combine_gff(args_circ.fastas, self.gff_path,
"fasta", None)
tmp_reads = []
if args_circ.align:
self.multiparser.parser_fasta(args_circ.fastas)
tmp_reads = self._deal_zip_file(args_circ.read_folder)
align_files, prefixs = self._align(args_circ)
else:
self.multiparser.parser_fasta(args_circ.fastas)
prefixs = []
for fasta in os.listdir(self.fasta_path):
fasta_prefix = fasta.replace(".fa", "")
prefixs.append(fasta_prefix)
merge_folder, remove_frag, bam_files = self._assign_merge_bam(
args_circ)
align_files = None
for prefix in prefixs:
if args_circ.align:
sub_alignment_path = os.path.join(self.alignment_path, prefix)
bam_files, convert_ones, remove_ones = self._convert_sam2bam(
sub_alignment_path, args_circ.samtools_path, align_files)
else:
sub_alignment_path = merge_folder
convert_ones = []
remove_ones = []
self._merge_sort_aligment_file(
bam_files, args_circ.samtools_path, sub_alignment_path,
convert_ones, tmp_reads, remove_ones)
self._run_testrealign(prefix, args_circ.segemehl_path,
sub_alignment_path)
tmp_prefixs = self._merge_bed(args_circ.fastas, self.splice_path)
self.multiparser.parser_gff(args_circ.gffs, None)
self.multiparser.combine_gff(args_circ.fastas, self.gff_path,
"fasta", None)
self._stat_and_gen_gff(tmp_prefixs, args_circ)
self.helper.remove_tmp(args_circ.fastas)
self.helper.remove_tmp(args_circ.gffs)
for tmp_prefix in tmp_prefixs:
shutil.rmtree(tmp_prefix)
if (not args_circ.align) and (len(remove_frag) != 0):
for frag in remove_frag:
os.remove(os.path.join(merge_folder, frag))
示例11: MEME
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_gff [as 别名]
#.........这里部分代码省略.........
fasta.endswith(".fasta")):
self.helper.merge_file(os.path.join(
args_pro.fastas, fasta), self.all_fasta)
print("generating fasta file of all fasta files")
prefixs.append("allfasta")
input_path = os.path.join(self.out_fasta, "allfasta")
self.helper.check_make_folder(os.path.join(
args_pro.output_folder, "allfasta"))
self.helper.check_make_folder(os.path.join(
self.out_fasta, "allfasta"))
args_pro.source = True
upstream(self.all_tss, self.all_fasta, None,
None, args_pro)
self._move_and_merge_fasta(input_path, "allfasta")
def _remove_files(self, args_pro):
self.helper.remove_tmp(args_pro.fastas)
self.helper.remove_tmp(args_pro.tsss)
self.helper.remove_tmp(args_pro.gffs)
self.helper.remove_tmp(args_pro.wigs)
if "allfasta.fa" in os.listdir(args_pro.fastas):
os.remove(self.all_fasta)
if "allfasta" in os.listdir(os.getcwd()):
shutil.rmtree("allfasta")
shutil.rmtree("tmp")
def _gen_table(self, output_folder, prefixs, combine):
if combine:
strains = prefixs + ["allfasta"]
else:
strains = prefixs
for strain in strains:
for folder in os.listdir(os.path.join(output_folder, strain)):
tss_file = os.path.join(self.tss_path, strain + "_TSS.gff")
gen_promoter_table(os.path.join(output_folder, strain,
folder, "meme.txt"),
os.path.join(output_folder, strain,
folder, "meme.csv"), tss_file)
def _get_upstream(self, args_pro, prefix, tss, fasta):
if args_pro.source:
print("generating fasta file of {0}".format(prefix))
upstream(os.path.join(self.tss_path, tss),
os.path.join(args_pro.fastas, fasta),
None, None, args_pro)
else:
if (args_pro.gffs is None) or (
args_pro.wigs is None) or (
args_pro.input_libs is None):
print("Error:please assign proper annotation, tex +/- "
"wig folder and tex treated libs!!!")
sys.exit()
if "TSS_class" not in os.listdir(args_pro.output_folder):
os.mkdir(os.path.join(args_pro.output_folder, "TSS_class"))
print("classifying TSS and extracting fasta {0}".format(prefix))
upstream(os.path.join(self.tss_path, tss),
os.path.join(args_pro.fastas, fasta),
os.path.join(self.gff_path, prefix + ".gff"),
os.path.join(args_pro.output_folder, "TSS_class",
"_".join([prefix, "TSS.gff"])), args_pro)
def run_meme(self, args_pro):
if "allfasta.fa" in os.listdir(args_pro.fastas):
os.remove(self.all_fasta)
if "allfasta.fa_folder" in os.listdir(args_pro.fastas):
shutil.rmtree(os.path.join(args_pro.fastas,
"allfasta.fa_folder"))
self.multiparser.parser_fasta(args_pro.fastas)
self.multiparser.parser_gff(args_pro.tsss, "TSS")
if "allfasta_TSS.gff" in os.listdir(self.tss_path):
os.remove(self.all_tss)
if args_pro.gffs is not None:
self._check_gff(args_pro.gffs)
self.multiparser.parser_gff(args_pro.gffs, None)
self.multiparser.combine_gff(args_pro.fastas, self.gff_path,
"fasta", None)
self._check_gff(args_pro.tsss)
self.multiparser.combine_gff(args_pro.fastas, self.tss_path,
"fasta", "TSS")
self.helper.check_make_folder(self.out_fasta)
self.helper.check_make_folder(self.tmp_folder)
prefixs = []
for tss in os.listdir(self.tss_path):
prefix = tss.replace("_TSS.gff", "")
prefixs.append(prefix)
self.helper.check_make_folder(os.path.join(args_pro.output_folder,
prefix))
self.helper.check_make_folder(os.path.join(self.out_fasta,
prefix))
input_path = os.path.join(self.out_fasta, prefix)
fasta = self._get_fasta_file(args_pro.fastas, prefix)
self._get_upstream(args_pro, prefix, tss, fasta)
self._move_and_merge_fasta(input_path, prefix)
self._split_fasta_by_strain(input_path)
if args_pro.combine:
self._combine_file(prefixs, args_pro)
self._run_program(prefixs, args_pro)
print("generating the table...")
self._gen_table(args_pro.output_folder, prefixs, args_pro.combine)
self._remove_files(args_pro)
示例12: TSSpredator
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_gff [as 别名]
#.........这里部分代码省略.........
def _validate(self, tsss, args_tss, log):
'''validate TSS with genome annotation'''
print("Validating TSSs with genome annotations")
log.write("Running validate_gene.py to compare genome "
"annotations and TSSs/PSs.\n")
for tss in tsss:
for gff in os.listdir(args_tss.gffs):
if (gff[:-4] == tss) and (".gff" in gff):
break
stat_file = os.path.join(
self.stat_outfolder, tss,
"".join(["stat_gene_vali_", tss, ".csv"]))
out_cds_file = os.path.join(args_tss.out_folder, "tmp.gff")
if args_tss.program.lower() == "tss":
compare_file = os.path.join(self.gff_outfolder,
"_".join([tss, "TSS.gff"]))
elif args_tss.program.lower() == "processing":
compare_file = os.path.join(self.gff_outfolder,
"_".join([tss, "processing.gff"]))
validate_gff(compare_file, os.path.join(args_tss.gffs, gff),
stat_file, out_cds_file, args_tss.utr_length,
args_tss.program.lower())
log.write("\t" + stat_file + " is generated.\n")
shutil.move(out_cds_file, os.path.join(args_tss.gffs, gff))
def _compare_ta(self, tsss, args_tss, log):
'''compare TSS with transcript'''
detect = False
log.write("Running stat_TA_comparison to compare transcripts "
"and TSSs/PSs.\n")
print("Comparing transcripts and TSSs")
self.multiparser.parser_gff(args_tss.ta_files, "transcript")
self.multiparser.combine_gff(args_tss.gffs, self.tmps["ta"],
None, "transcript")
for tss in tsss:
stat_out = os.path.join(
self.stat_outfolder, tss, "".join([
"stat_compare_TSS_transcript_",
tss, ".csv"]))
for ta in os.listdir(self.tmps["ta"]):
filename = ta.split("_transcript")
if (filename[0] == tss) and (filename[1] == ".gff"):
detect = True
break
compare_file = os.path.join(self.gff_outfolder,
"_".join([tss, "TSS.gff"]))
if detect:
stat_ta_tss(os.path.join(self.tmps["ta"], ta), compare_file,
stat_out, self.tmps["ta_tss"],
self.tmps["tss_ta"], args_tss.fuzzy)
self.helper.sort_gff(self.tmps["tss_ta"], compare_file)
self.helper.sort_gff(self.tmps["ta_tss"],
os.path.join(args_tss.ta_files, ta))
os.remove(self.tmps["tss_ta"])
os.remove(self.tmps["ta_tss"])
detect = False
log.write("\t" + stat_out + " is generated.\n")
def _stat_tss(self, tsss, feature, log):
print("Running statistaics")
for tss in tsss:
compare_file = os.path.join(self.gff_outfolder,
"_".join([tss, feature]) + ".gff")
stat_tsspredator(
compare_file, feature,
示例13: UTRDetection
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_gff [as 别名]
class UTRDetection(object):
def __init__(self, args_utr):
self.helper = Helper()
self.multiparser = Multiparser()
self.tss_path = os.path.join(args_utr.tsss, "tmp")
self.tran_path = os.path.join(args_utr.trans, "tmp")
self.utr5_path = os.path.join(args_utr.out_folder, "5UTR")
self.utr3_path = os.path.join(args_utr.out_folder, "3UTR")
self.utr5_stat_path = os.path.join(self.utr5_path, "statistics")
self.utr3_stat_path = os.path.join(self.utr3_path, "statistics")
def _check_folder(self, folder):
if folder is None:
print("Error: lack required files!!!")
sys.exit()
def _check_gff(self, folder):
for gff in os.listdir(folder):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(folder, gff))
def _compute_utr(self, args_utr):
for gff in os.listdir(args_utr.gffs):
if gff.endswith(".gff"):
prefix = gff[:-4]
tss = self.helper.get_correct_file(
self.tss_path, "_TSS.gff", prefix, None, None)
tran = self.helper.get_correct_file(
self.tran_path, "_transcript.gff", prefix, None, None)
if args_utr.terms:
term = self.helper.get_correct_file(
os.path.join(args_utr.terms, "tmp"),
"_term.gff", prefix, None, None)
else:
term = None
print("computing 5'UTR of {0} .....".format(prefix))
detect_5utr(tss, os.path.join(args_utr.gffs, gff),
tran, os.path.join(self.utr5_path, "gffs",
"_".join([prefix, "5UTR.gff"])), args_utr)
print("computing 3'UTR of {0} .....".format(prefix))
detect_3utr(tran, os.path.join(args_utr.gffs, gff),
term, os.path.join(self.utr3_path, "gffs",
"_".join([prefix, "3UTR.gff"])), args_utr)
self.helper.move_all_content(
os.getcwd(), self.utr5_stat_path, ["_5utr_length.png"])
self.helper.move_all_content(
os.getcwd(), self.utr3_stat_path, ["_3utr_length.png"])
def run_utr_detection(self, args_utr):
self._check_folder(args_utr.tsss)
self._check_folder(args_utr.gffs)
self._check_folder(args_utr.trans)
self._check_gff(args_utr.tsss)
self._check_gff(args_utr.gffs)
self._check_gff(args_utr.trans)
self._check_gff(args_utr.terms)
self.multiparser.parser_gff(args_utr.gffs, None)
self.multiparser.parser_gff(args_utr.tsss, "TSS")
self.multiparser.combine_gff(args_utr.gffs, self.tss_path, None, "TSS")
self.multiparser.parser_gff(args_utr.trans, "transcript")
self.multiparser.combine_gff(args_utr.gffs, self.tran_path,
None, "transcript")
if args_utr.terms:
self.multiparser.parser_gff(args_utr.terms, "term")
self.multiparser.combine_gff(args_utr.gffs,
os.path.join(args_utr.terms, "tmp"),
None, "term")
self._compute_utr(args_utr)
self.helper.remove_tmp(args_utr.gffs)
self.helper.remove_tmp(args_utr.tsss)
self.helper.remove_tmp(args_utr.trans)
self.helper.remove_tmp(args_utr.terms)
self.helper.remove_tmp(self.utr5_path)
self.helper.remove_tmp(self.utr3_path)
示例14: TSSpredator
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_gff [as 别名]
#.........这里部分代码省略.........
os.path.join(self.tmps["tss"], filename),
os.path.join(args_tss.gffs, gff), args_tss)
shutil.move(stat_file, os.path.join(args_tss.out_folder,
"statistics", tss, stat_file))
self.helper.move_all_content(self.tmps["tss"],
self.gff_outfolder, [".gff"])
shutil.rmtree(self.tmps["tss"])
def _validate(self, tsss, args_tss):
print("Running validation of annotation....")
for tss in tsss:
for gff in os.listdir(args_tss.gffs):
if (gff[:-4] == tss) and (".gff" in gff):
break
stat_file = os.path.join(
self.stat_outfolder, tss,
"".join(["stat_gene_vali_", tss, ".csv"]))
out_cds_file = os.path.join(args_tss.out_folder, "tmp.gff")
if args_tss.program.lower() == "tss":
compare_file = os.path.join(self.gff_outfolder,
"_".join([tss, "TSS.gff"]))
elif args_tss.program.lower() == "processing":
compare_file = os.path.join(self.gff_outfolder,
"_".join([tss, "processing.gff"]))
validate_gff(compare_file, os.path.join(args_tss.gffs, gff),
stat_file, out_cds_file, args_tss.utr_length,
args_tss.program.lower())
shutil.move(out_cds_file, os.path.join(args_tss.gffs, gff))
def _compare_ta(self, tsss, args_tss):
detect = False
print("Running compare transcript assembly and TSS ...")
self.multiparser.parser_gff(args_tss.ta_files, "transcript")
self.multiparser.combine_gff(args_tss.gffs, self.tmps["ta"],
None, "transcript")
for tss in tsss:
stat_out = os.path.join(
self.stat_outfolder, tss, "".join([
"stat_compare_TSS_Transcriptome_assembly_",
tss, ".csv"]))
for ta in os.listdir(self.tmps["ta"]):
filename = ta.split("_transcript")
if (filename[0] == tss) and (filename[1] == ".gff"):
detect = True
break
compare_file = os.path.join(self.gff_outfolder,
"_".join([tss, "TSS.gff"]))
if detect:
stat_ta_tss(os.path.join(self.tmps["ta"], ta), compare_file,
stat_out, self.tmps["ta_tss"],
self.tmps["tss_ta"], args_tss.fuzzy)
self.helper.sort_gff(self.tmps["tss_ta"], compare_file)
self.helper.sort_gff(self.tmps["ta_tss"],
os.path.join(args_tss.ta_files, ta))
os.remove(self.tmps["tss_ta"])
os.remove(self.tmps["ta_tss"])
detect = False
def _stat_tss(self, tsss, feature):
print("Running statistaics.....")
for tss in tsss:
compare_file = os.path.join(self.gff_outfolder,
"_".join([tss, feature]) + ".gff")
stat_tsspredator(
compare_file, feature,
os.path.join(self.stat_outfolder, tss, "_".join([
示例15: TranscriptAssembly
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_gff [as 别名]
class TranscriptAssembly(object):
def __init__(self, args_tran):
self.multiparser = Multiparser()
self.helper = Helper()
self.converter = Converter()
self.gff_outfolder = os.path.join(args_tran.out_folder, "gffs")
self.tran_path = os.path.join(self.gff_outfolder, "tmp")
self.stat_path = os.path.join(args_tran.out_folder, "statistics")
self.tmps = {"gff": "tmp.gff", "merge": "tmp_merge",
"tran": os.path.join(args_tran.out_folder, "tmp_tran"),
"tss_ta": os.path.join(self.gff_outfolder, "tmp_tss_ta"),
"ta_tss": os.path.join(self.gff_outfolder, "tmp_ta_tss"),
"ta_gff": os.path.join(self.gff_outfolder, "tmp_ta_gff"),
"gff_ta": os.path.join(self.gff_outfolder, "tmp_gff_ta"),
"uni": os.path.join(self.gff_outfolder, "tmp_uni"),
"overlap": os.path.join(
self.gff_outfolder, "tmp_overlap")}
self.frag = "transcript_assembly_fragment.gff"
self.tex = "transcript_assembly_tex_notex.gff"
self.endfix_tran = "transcript.gff"
def _compute_transcript(self, wig_f, wig_r, wig_folder, wig_type, strain,
libs, args_tran):
print("Computing transcript assembly for {0}...".format(strain))
out = os.path.join(args_tran.out_folder, "_".join([strain, wig_type]))
assembly(wig_f, wig_r, wig_folder, libs, out, wig_type, args_tran)
def _compute(self, wig_type, wigs, libs, args_tran):
strains = []
wig_folder = os.path.join(wigs, "tmp")
for wig in os.listdir(wig_folder):
if wig.endswith("_forward.wig"):
strains.append(wig.replace("_forward.wig", ""))
for strain in strains:
f_file = os.path.join(wig_folder, "_".join(
[strain, "forward.wig"]))
r_file = os.path.join(wig_folder, "_".join(
[strain, "reverse.wig"]))
self._compute_transcript(f_file, r_file, wigs, wig_type,
strain, libs, args_tran)
return strains
def _compare_tss(self, tas, args_tran):
self.multiparser.parser_gff(args_tran.compare_tss, "TSS")
self.multiparser.combine_gff(
self.gff_outfolder,
os.path.join(args_tran.compare_tss, "tmp"),
"transcript", "TSS")
print("Comaring of Transcript assembly and TSS file...")
tss_folder = os.path.join(args_tran.compare_tss, "tmp")
for ta in tas:
ta_file = os.path.join(self.gff_outfolder,
"_".join([ta, self.endfix_tran]))
stat_tss_out = os.path.join(
self.stat_path, "".join([
"stat_compare_Transcriptome_assembly_TSS_",
ta, ".csv"]))
for tss in os.listdir(tss_folder):
filename = tss.split("_TSS")
if (filename[0] == ta) and (tss.endswith(".gff")):
stat_ta_tss(ta_file, os.path.join(tss_folder, tss),
stat_tss_out, self.tmps["ta_tss"],
self.tmps["tss_ta"], args_tran.fuzzy)
os.remove(ta_file)
os.remove(os.path.join(tss_folder, tss))
self.helper.sort_gff(self.tmps["ta_tss"], ta_file)
self.helper.sort_gff(
self.tmps["tss_ta"], os.path.join(
args_tran.compare_tss, tss))
os.remove(self.tmps["tss_ta"])
os.remove(self.tmps["ta_tss"])
def _compare_cds(self, tas, args_tran):
self.multiparser.parser_gff(args_tran.compare_cds, None)
self.multiparser.combine_gff(
self.gff_outfolder, os.path.join(args_tran.compare_cds, "tmp"),
"transcript", None)
print("Comaring of Transcript assembly and gene...")
cds_folder = os.path.join(args_tran.compare_cds, "tmp")
for ta in tas:
ta_file = os.path.join(self.gff_outfolder,
"_".join([ta, self.endfix_tran]))
stat_gff_out = os.path.join(self.stat_path, "".join([
"stat_compare_Transcriptome_assembly_gene_", ta, ".csv"]))
for gff in os.listdir(cds_folder):
if (gff[:-4] == ta) and (gff.endswith(".gff")):
cds_file = os.path.join(cds_folder, gff)
stat_ta_gff(ta_file, cds_file, stat_gff_out,
self.tmps["ta_gff"], self.tmps["gff_ta"],
args_tran.c_feature)
os.remove(ta_file)
os.remove(os.path.join(args_tran.compare_cds, gff))
self.helper.sort_gff(self.tmps["ta_gff"], ta_file)
self.helper.sort_gff(self.tmps["gff_ta"], os.path.join(
args_tran.compare_cds, gff))
os.remove(self.tmps["ta_gff"])
os.remove(self.tmps["gff_ta"])
def _compare_tss_cds(self, tas, args_tran):
#.........这里部分代码省略.........