本文整理汇总了Python中annogesiclib.helper.Helper.sort_gff方法的典型用法代码示例。如果您正苦于以下问题:Python Helper.sort_gff方法的具体用法?Python Helper.sort_gff怎么用?Python Helper.sort_gff使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类annogesiclib.helper.Helper
的用法示例。
在下文中一共展示了Helper.sort_gff方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: TestHelper
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import sort_gff [as 别名]
class TestHelper(unittest.TestCase):
def setUp(self):
self.example = ExampleData()
self.helper = Helper()
self.gff_out = self.example.gff_out
self.rev_seq = self.example.rev_seq.replace("\n", "")
self.test_folder = "test_folder"
if (not os.path.exists(self.test_folder)):
os.mkdir(self.test_folder)
self.gff_file = os.path.join(self.test_folder, "test.gff")
with open(self.gff_file, "w") as rh:
rh.write(self.example.gff_file)
self.seq_file = os.path.join(self.test_folder, "test.fa")
with open(self.seq_file, "w") as rh:
rh.write(self.example.seq)
def tearDown(self):
if os.path.exists(self.test_folder):
shutil.rmtree(self.test_folder)
def test_remove_all_content(self):
tmp1 = os.path.join(self.test_folder, "tmp1.gff")
tmp2 = os.path.join(self.test_folder, "tmp2")
shutil.copyfile(self.gff_file, tmp1)
os.mkdir(tmp2)
self.helper.remove_all_content(self.test_folder, "tmp", "file")
self.assertFalse(os.path.exists(tmp1))
self.assertTrue(os.path.exists(tmp2))
self.helper.remove_all_content(self.test_folder, "tmp", "dir")
self.assertFalse(os.path.exists(tmp2))
self.assertTrue(os.path.exists(self.gff_file))
def test_remove_tmp(self):
tmp1 = os.path.join(self.test_folder, "tmp")
tmp2 = os.path.join(self.test_folder, "test.gff_folder")
os.mkdir(tmp1)
os.mkdir(tmp2)
self.helper.remove_tmp(self.test_folder)
self.assertFalse(os.path.exists(tmp1))
self.assertFalse(os.path.exists(tmp2))
def test_get_correct_file(self):
gff_file = os.path.join(self.test_folder, "test.gff")
wig_f_file = os.path.join(self.test_folder,
"test_forward.wig_STRAIN_aaa.wig")
wig_r_file = os.path.join(self.test_folder,
"test_reverse.wig_STRAIN_aaa.wig")
shutil.copyfile(gff_file, wig_f_file)
shutil.copyfile(gff_file, wig_r_file)
libs = ["test_forward.wig_STRAIN_aaa.wig:frag:1:a:+",
"test_reverse.wig_STRAIN_aaa.wig:frag:1:a:-"]
filename = self.helper.get_correct_file(
self.test_folder, ".gff", "test", None, libs)
self.assertEqual(filename, gff_file)
def test_sorf_gff(self):
out_file = os.path.join(self.test_folder, "test.out")
self.helper.sort_gff(self.gff_file, out_file)
datas = import_data(out_file)
self.assertEqual(set(datas), set(self.gff_out.split("\n")))
def test_extract_gene(self):
seq = self.example.seq.replace("\n", "")
new_seq = self.helper.extract_gene(seq, 1, 70, "+")
self.assertEqual(new_seq,
"CGCAGGTTGAGTTCCTGTTCCCGATAGATCCGATAAACCCGCTTATGATTCCAGAGCTGTCCCTGCACAT")
new_seq = self.helper.extract_gene(seq, 1, 140, "-")
self.assertEqual(new_seq, self.rev_seq)
def test_get_seq(self):
gff_file = os.path.join(self.test_folder, "test.gff")
out_file = os.path.join(self.test_folder, "test.cds")
lines = self.example.gff_out.split("\n")
with open(gff_file, "w") as gh:
gh.write(lines[1])
self.helper.get_seq(self.gff_file, self.seq_file, out_file)
datas = import_data(out_file)
self.assertEqual(set(datas), set([">cds0|aaa|1|10|+", "CGCAGGTTGA"]))
示例2: Terminator
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import sort_gff [as 别名]
#.........这里部分代码省略.........
for wig in os.listdir(args_term.tex_wigs):
if os.path.isdir(os.path.join(args_term.tex_wigs, wig)):
pass
else:
shutil.copy(os.path.join(args_term.tex_wigs, wig),
merge_wigs)
for wig in os.listdir(args_term.frag_wigs):
if os.path.isdir(os.path.join(args_term.frag_wigs, wig)):
pass
else:
shutil.copy(os.path.join(args_term.frag_wigs, wig),
merge_wigs)
elif (args_term.tex_wigs is not None):
merge_wigs = args_term.tex_wigs
elif (args_term.frag_wigs is not None):
merge_wigs = args_term.frag_wigs
else:
print("Error: no proper wig files!!!")
sys.exit()
return merge_wigs
def _merge_sRNA(self, sRNAs, prefixs, gff_path):
if sRNAs is not None:
self.multiparser.parser_gff(sRNAs, "sRNA")
self.helper.check_make_folder(self.tmps["merge"])
for prefix in prefixs:
tmp_gff = os.path.join(self.tmps["merge"], self.tmps["gff"])
if self.tmps["gff"] in os.listdir(self.tmps["merge"]):
os.remove(tmp_gff)
self.helper.merge_file(os.path.join(gff_path, prefix + ".gff"),
tmp_gff)
self.helper.merge_file(os.path.join(
self.srna_path, "_".join([prefix, "sRNA.gff"])), tmp_gff)
self.helper.sort_gff(tmp_gff, os.path.join(
self.tmps["merge"], prefix + ".gff"))
os.remove(tmp_gff)
merge_path = self.tmps["merge"]
else:
merge_path = gff_path
return merge_path
def _move_file(self, term_outfolder, csv_outfolder):
for gff in os.listdir(term_outfolder):
if gff.endswith("_term.gff"):
self.helper.sort_gff(os.path.join(term_outfolder, gff),
self.tmps["gff"])
shutil.move(self.tmps["gff"],
os.path.join(term_outfolder, gff))
prefix = gff.replace("_term.gff", "")
new_gff = os.path.join(self.terms["all"], "_".join([
prefix, self.suffixs["allgff"]]))
csv_file = os.path.join(
os.path.join(self.csvs["all"], "_".join([
prefix, self.suffixs["csv"]])))
out = open(new_gff, "w")
out.write("##gff-version 3\n")
out.close()
self.helper.merge_file(
os.path.join(term_outfolder, gff),
os.path.join(
self.terms["all"], "_".join([
prefix, self.suffixs["allgff"]])))
os.remove(os.path.join(term_outfolder, gff))
pre_strain = ""
if ("_".join([prefix, self.suffixs["csv"]]) in
os.listdir(self.csvs["all"])):
示例3: TranscriptDetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import sort_gff [as 别名]
class TranscriptDetection(object):
'''doing for transcript detection'''
def __init__(self, args_tran):
self.multiparser = Multiparser()
self.helper = Helper()
self.converter = Converter()
self.gff_outfolder = os.path.join(args_tran.out_folder, "gffs")
self.tran_path = os.path.join(self.gff_outfolder, "tmp")
self.stat_path = os.path.join(args_tran.out_folder, "statistics")
self.tmps = {"gff": "tmp.gff", "merge": "tmp_merge",
"tran": os.path.join(args_tran.out_folder, "tmp_tran"),
"tss_ta": os.path.join(self.gff_outfolder, "tmp_tss_ta"),
"ta_tss": os.path.join(self.gff_outfolder, "tmp_ta_tss"),
"ta_gff": os.path.join(self.gff_outfolder, "tmp_ta_gff"),
"gff_ta": os.path.join(self.gff_outfolder, "tmp_gff_ta"),
"uni": os.path.join(self.gff_outfolder, "tmp_uni"),
"overlap": os.path.join(
self.gff_outfolder, "tmp_overlap")}
self.frag = "transcript_fragment.gff"
self.tex = "transcript_tex_notex.gff"
self.endfix_tran = "transcript.gff"
def _compute_transcript(self, wig_f, wig_r, wig_folder, wig_type, strain,
libs, args_tran):
print("Computing transcripts for {0}".format(strain))
out = os.path.join(args_tran.out_folder, "_".join([strain, wig_type]))
detect_transcript(wig_f, wig_r, wig_folder, libs, out, wig_type, args_tran)
def _compute(self, wig_type, wigs, libs, args_tran):
strains = []
wig_folder = os.path.join(wigs, "tmp")
for wig in os.listdir(wig_folder):
if wig.endswith("_forward.wig"):
strains.append(wig.replace("_forward.wig", ""))
for strain in strains:
f_file = os.path.join(wig_folder, "_".join(
[strain, "forward.wig"]))
r_file = os.path.join(wig_folder, "_".join(
[strain, "reverse.wig"]))
self._compute_transcript(f_file, r_file, wigs, wig_type,
strain, libs, args_tran)
return strains
def _compare_tss(self, tas, args_tran, log):
self.multiparser.parser_gff(args_tran.compare_tss, "TSS")
self.multiparser.combine_gff(
self.gff_outfolder,
os.path.join(args_tran.compare_tss, "tmp"),
"transcript", "TSS")
print("Comaring of transcripts and TSSs")
log.write("Running stat_TA_comparison.py to compare transcripts "
"with TSSs.\n")
tss_folder = os.path.join(args_tran.compare_tss, "tmp")
for ta in tas:
ta_file = os.path.join(self.gff_outfolder,
"_".join([ta, self.endfix_tran]))
stat_tss_out = os.path.join(
self.stat_path, "".join([
"stat_compare_transcript_TSS_",
ta, ".csv"]))
for tss in os.listdir(tss_folder):
filename = tss.split("_TSS")
if (filename[0] == ta) and (tss.endswith(".gff")):
stat_ta_tss(ta_file, os.path.join(tss_folder, tss),
stat_tss_out, self.tmps["ta_tss"],
self.tmps["tss_ta"], args_tran.fuzzy)
os.remove(ta_file)
os.remove(os.path.join(tss_folder, tss))
self.helper.sort_gff(self.tmps["ta_tss"], ta_file)
self.helper.sort_gff(
self.tmps["tss_ta"], os.path.join(
args_tran.compare_tss, tss))
os.remove(self.tmps["tss_ta"])
os.remove(self.tmps["ta_tss"])
log.write("\t" + stat_tss_out + "\n")
def _compare_cds(self, tas, args_tran, log):
self.multiparser.parser_gff(args_tran.gffs, None)
self.multiparser.combine_gff(
self.gff_outfolder, os.path.join(args_tran.gffs, "tmp"),
"transcript", None)
print("Comaring of transcripts and genome annotations")
cds_folder = os.path.join(args_tran.gffs, "tmp")
log.write("Running stat_TA_comparison.py to compare transcripts "
"with genome annotations.\n")
for ta in tas:
ta_file = os.path.join(self.gff_outfolder,
"_".join([ta, self.endfix_tran]))
stat_gff_out = os.path.join(self.stat_path, "".join([
"stat_compare_transcript_genome_", ta, ".csv"]))
for gff in os.listdir(cds_folder):
if (gff[:-4] == ta) and (gff.endswith(".gff")):
cds_file = os.path.join(cds_folder, gff)
stat_ta_gff(ta_file, cds_file, stat_gff_out,
self.tmps["ta_gff"], self.tmps["gff_ta"],
args_tran.c_feature)
os.remove(ta_file)
os.remove(os.path.join(args_tran.gffs, gff))
self.helper.sort_gff(self.tmps["ta_gff"], ta_file)
#.........这里部分代码省略.........
示例4: sRNADetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import sort_gff [as 别名]
class sRNADetection(object):
def __init__(self, args_srna):
self.args_container = ArgsContainer()
self.helper = Helper()
self.multiparser = Multiparser()
self.gff_output = os.path.join(args_srna.out_folder, "gffs")
self.table_output = os.path.join(args_srna.out_folder, "tables")
self.stat_path = os.path.join(args_srna.out_folder, "statistics")
self.tss_path = self._check_folder_exist(args_srna.tss_folder)
self.pro_path = self._check_folder_exist(args_srna.pro_folder)
self.sorf_path = self._check_folder_exist(args_srna.sorf_file)
self.fasta_path = os.path.join(args_srna.fastas, "tmp")
self.tran_path = os.path.join(args_srna.trans, "tmp")
self.term_path = self._check_folder_exist(args_srna.terms)
self.merge_wigs = os.path.join(args_srna.out_folder, "merge_wigs")
self.prefixs = {"merge": os.path.join(
args_srna.out_folder, "tmp_merge"),
"utr": os.path.join(
args_srna.out_folder, "tmp_utrsrna"),
"normal": os.path.join(
args_srna.out_folder, "tmp_normal"),
"in_cds": os.path.join(
args_srna.out_folder, "tmp_incds"),
"merge_table": os.path.join(
args_srna.out_folder, "tmp_merge_table"),
"utr_table": os.path.join(
args_srna.out_folder, "tmp_utrsrna_table"),
"normal_table": os.path.join(
args_srna.out_folder, "tmp_normal_table"),
"in_cds_table": os.path.join(
args_srna.out_folder, "tmp_incds_table"),
"basic": os.path.join(
args_srna.out_folder, "tmp_basic"),
"energy": os.path.join(
args_srna.out_folder, "tmp_energy")}
self.tmps = {"nr": os.path.join(args_srna.out_folder, "tmp_nr"),
"srna": os.path.join(args_srna.out_folder, "tmp_sRNA")}
self.best_table = os.path.join(self.table_output, "best")
self.table_output = os.path.join(args_srna.out_folder, "tables")
self.stat_path = os.path.join(args_srna.out_folder, "statistics")
self.all_best = {"all_gff": os.path.join(
self.gff_output, "all_candidates"),
"best_gff": os.path.join(self.gff_output, "best"),
"all_table": os.path.join(
self.table_output, "all_candidates"),
"best_table": os.path.join(self.table_output, "best")}
def _check_folder_exist(self, folder):
if folder is not None:
path = os.path.join(folder, "tmp")
else:
path = None
return path
def _check_gff(self, gffs):
for gff in os.listdir(gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(gffs, gff))
def _run_format(self, blast_path, database, type_, db_file, err):
call([os.path.join(blast_path, "makeblastdb"), "-in", database,
"-dbtype", type_, "-out", db_file], stderr=err)
def _formatdb(self, database, type_, out_folder,
blast_path, database_type):
err = open(os.path.join(out_folder, "log.txt"), "w")
if (database.endswith(".fa")) or (
database.endswith(".fna")) or (
database.endswith(".fasta")):
pass
else:
folders = database.split("/")
filename = folders[-1]
folder = "/".join(folders[:-1])
for fasta in os.listdir(folder):
if (fasta.endswith(".fa")) or (
fasta.endswith(".fna")) or (
fasta.endswith(".fasta")):
if ".".join(fasta.split(".")[:-1]) == filename:
database = os.path.join(folder, fasta)
if database_type == "sRNA":
change_format(database, "tmp_srna_database")
os.remove(database)
shutil.move("tmp_srna_database", database)
db_file = ".".join(database.split(".")[:-1])
self._run_format(blast_path, database, type_, db_file, err)
err.close()
def _merge_frag_tex_file(self, files, args_srna):
if (args_srna.frag_wigs is not None) and (
args_srna.tex_wigs is not None):
self.helper.merge_file(files["frag_gff"], files["tex_gff"])
self.helper.merge_file(files["frag_csv"], files["tex_csv"])
shutil.move(files["tex_csv"], files["merge_csv"])
self.helper.sort_gff(files["tex_gff"], files["merge_gff"])
os.remove(files["frag_csv"])
os.remove(files["frag_gff"])
os.remove(files["tex_gff"])
elif (args_srna.frag_wigs is not None):
#.........这里部分代码省略.........
示例5: Multiparser
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import sort_gff [as 别名]
#.........这里部分代码省略.........
first = False
else:
out.close()
out_t.close()
out = open(os.path.join(
out_path, name + ".fa"), "w")
out_t = open(os.path.join(
par_tmp, name + ".fa"), "w")
out.write(">" + name + "\n")
out_t.write(">" + name + "\n")
else:
out.write(line)
out_t.write(line)
out.close()
out_t.close()
def parser_gff(self, gff_folder, feature):
par_tmp = os.path.join(gff_folder, "tmp")
out = None
out_t = None
first = True
if feature is None:
feature = ""
else:
feature = "_" + feature
self.helper.check_make_folder(par_tmp)
for filename in os.listdir(gff_folder):
pre_seq_id = ""
if ("_folder" not in filename) and ("tmp" != filename):
out_path = os.path.join(gff_folder, filename + "_folder")
if ".gff" in filename:
print("Parser " + filename + "...")
self.helper.check_make_folder(out_path)
self.helper.sort_gff(os.path.join(gff_folder, filename),
os.path.join(gff_folder, "tmp.gff"))
f_h = open(os.path.join(gff_folder, "tmp.gff"), "r")
for row in csv.reader(f_h, delimiter="\t"):
if row[0].startswith("#"):
continue
else:
if pre_seq_id == row[0]:
out.write("\t".join(row) + "\n")
out_t.write("\t".join(row) + "\n")
else:
if first:
first = False
else:
out.close()
out_t.close()
out = open(os.path.join(out_path,
row[0] + feature + ".gff"), "w")
out_t = open(os.path.join(par_tmp,
row[0] + feature + ".gff"), "w")
pre_seq_id = row[0]
out.write("\t".join(row) + "\n")
out_t.write("\t".join(row) + "\n")
f_h.close()
if os.path.exists(os.path.join(gff_folder, "tmp.gff")):
os.remove(os.path.join(gff_folder, "tmp.gff"))
out.close()
out_t.close()
def parser_wig(self, wig_folder):
par_tmp = os.path.join(wig_folder, "tmp")
first = True
out = None
示例6: Terminator
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import sort_gff [as 别名]
#.........这里部分代码省略.........
if os.path.isdir(os.path.join(args_term.tex_wigs, wig)):
pass
else:
shutil.copy(os.path.join(args_term.tex_wigs, wig),
merge_wigs)
for wig in os.listdir(args_term.frag_wigs):
if os.path.isdir(os.path.join(args_term.frag_wigs, wig)):
pass
else:
shutil.copy(os.path.join(args_term.frag_wigs, wig),
merge_wigs)
elif (args_term.tex_wigs is not None):
merge_wigs = args_term.tex_wigs
elif (args_term.frag_wigs is not None):
merge_wigs = args_term.frag_wigs
else:
print("Error: Wiggle files are not assigned!")
sys.exit()
return merge_wigs
def _merge_sRNA(self, sRNAs, prefixs, gff_path):
'''searching the terminator with sRNA information'''
if sRNAs is not None:
self.multiparser.parser_gff(sRNAs, "sRNA")
self.helper.check_make_folder(self.tmps["merge"])
for prefix in prefixs:
tmp_gff = os.path.join(self.tmps["merge"], self.tmps["gff"])
if self.tmps["gff"] in os.listdir(self.tmps["merge"]):
os.remove(tmp_gff)
self.helper.merge_file(os.path.join(gff_path, prefix + ".gff"),
tmp_gff)
self.helper.merge_file(os.path.join(
self.srna_path, "_".join([prefix, "sRNA.gff"])), tmp_gff)
self.helper.sort_gff(tmp_gff, os.path.join(
self.tmps["merge"], prefix + ".gff"))
os.remove(tmp_gff)
merge_path = self.tmps["merge"]
else:
merge_path = gff_path
return merge_path
def _move_file(self, term_outfolder, csv_outfolder):
for gff in os.listdir(term_outfolder):
if gff.endswith("_term.gff"):
self.helper.sort_gff(os.path.join(term_outfolder, gff),
self.tmps["gff"])
shutil.move(self.tmps["gff"],
os.path.join(term_outfolder, gff))
prefix = gff.replace("_term.gff", "")
new_gff = os.path.join(self.terms["all"], "_".join([
prefix, self.suffixs["allgff"]]))
csv_file = os.path.join(
os.path.join(self.csvs["all"], "_".join([
prefix, self.suffixs["csv"]])))
out = open(new_gff, "w")
out.write("##gff-version 3\n")
out.close()
self.helper.merge_file(
os.path.join(term_outfolder, gff),
os.path.join(
self.terms["all"], "_".join([
prefix, self.suffixs["allgff"]])))
os.remove(os.path.join(term_outfolder, gff))
pre_strain = ""
if ("_".join([prefix, self.suffixs["csv"]]) in
os.listdir(self.csvs["all"])):
示例7: TSSpredator
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import sort_gff [as 别名]
#.........这里部分代码省略.........
compare_file = os.path.join(self.gff_outfolder,
"_".join([tss, "processing.gff"]))
validate_gff(compare_file, os.path.join(args_tss.gffs, gff),
stat_file, out_cds_file, args_tss.utr_length,
args_tss.program.lower())
log.write("\t" + stat_file + " is generated.\n")
shutil.move(out_cds_file, os.path.join(args_tss.gffs, gff))
def _compare_ta(self, tsss, args_tss, log):
'''compare TSS with transcript'''
detect = False
log.write("Running stat_TA_comparison to compare transcripts "
"and TSSs/PSs.\n")
print("Comparing transcripts and TSSs")
self.multiparser.parser_gff(args_tss.ta_files, "transcript")
self.multiparser.combine_gff(args_tss.gffs, self.tmps["ta"],
None, "transcript")
for tss in tsss:
stat_out = os.path.join(
self.stat_outfolder, tss, "".join([
"stat_compare_TSS_transcript_",
tss, ".csv"]))
for ta in os.listdir(self.tmps["ta"]):
filename = ta.split("_transcript")
if (filename[0] == tss) and (filename[1] == ".gff"):
detect = True
break
compare_file = os.path.join(self.gff_outfolder,
"_".join([tss, "TSS.gff"]))
if detect:
stat_ta_tss(os.path.join(self.tmps["ta"], ta), compare_file,
stat_out, self.tmps["ta_tss"],
self.tmps["tss_ta"], args_tss.fuzzy)
self.helper.sort_gff(self.tmps["tss_ta"], compare_file)
self.helper.sort_gff(self.tmps["ta_tss"],
os.path.join(args_tss.ta_files, ta))
os.remove(self.tmps["tss_ta"])
os.remove(self.tmps["ta_tss"])
detect = False
log.write("\t" + stat_out + " is generated.\n")
def _stat_tss(self, tsss, feature, log):
print("Running statistaics")
for tss in tsss:
compare_file = os.path.join(self.gff_outfolder,
"_".join([tss, feature]) + ".gff")
stat_tsspredator(
compare_file, feature,
os.path.join(self.stat_outfolder, tss, "_".join([
"stat", feature, "class", tss]) + ".csv"),
os.path.join(self.stat_outfolder, tss, "_".join([
"stat", feature, "libs", tss]) + ".csv"))
self.helper.move_all_content(os.getcwd(), os.path.join(
self.stat_outfolder, tss), ["_class", ".png"])
if os.path.exists(os.path.join(
self.stat_outfolder, "TSSstatistics.tsv")):
shutil.move(
os.path.join(
self.stat_outfolder, "TSSstatistics.tsv"),
os.path.join(
self.stat_outfolder, tss, "TSSstatistics.tsv"))
plot_venn(compare_file, feature)
self.helper.move_all_content(os.getcwd(), os.path.join(
self.stat_outfolder, tss), ["_venn", ".png"])
log.write("The following files in {0} are generated:\n".format(
(os.path.join(self.stat_outfolder, tss))))
示例8: TSSpredator
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import sort_gff [as 别名]
#.........这里部分代码省略.........
if args_tss.program.lower() == "tss":
compare_file = os.path.join(self.gff_outfolder,
"_".join([tss, "TSS.gff"]))
elif args_tss.program.lower() == "processing":
compare_file = os.path.join(self.gff_outfolder,
"_".join([tss, "processing.gff"]))
validate_gff(compare_file, os.path.join(args_tss.gffs, gff),
stat_file, out_cds_file, args_tss.utr_length,
args_tss.program.lower())
shutil.move(out_cds_file, os.path.join(args_tss.gffs, gff))
def _compare_ta(self, tsss, args_tss):
detect = False
print("Running compare transcript assembly and TSS ...")
self.multiparser.parser_gff(args_tss.ta_files, "transcript")
self.multiparser.combine_gff(args_tss.gffs, self.tmps["ta"],
None, "transcript")
for tss in tsss:
stat_out = os.path.join(
self.stat_outfolder, tss, "".join([
"stat_compare_TSS_Transcriptome_assembly_",
tss, ".csv"]))
for ta in os.listdir(self.tmps["ta"]):
filename = ta.split("_transcript")
if (filename[0] == tss) and (filename[1] == ".gff"):
detect = True
break
compare_file = os.path.join(self.gff_outfolder,
"_".join([tss, "TSS.gff"]))
if detect:
stat_ta_tss(os.path.join(self.tmps["ta"], ta), compare_file,
stat_out, self.tmps["ta_tss"],
self.tmps["tss_ta"], args_tss.fuzzy)
self.helper.sort_gff(self.tmps["tss_ta"], compare_file)
self.helper.sort_gff(self.tmps["ta_tss"],
os.path.join(args_tss.ta_files, ta))
os.remove(self.tmps["tss_ta"])
os.remove(self.tmps["ta_tss"])
detect = False
def _stat_tss(self, tsss, feature):
print("Running statistaics.....")
for tss in tsss:
compare_file = os.path.join(self.gff_outfolder,
"_".join([tss, feature]) + ".gff")
stat_tsspredator(
compare_file, feature,
os.path.join(self.stat_outfolder, tss, "_".join([
"stat", feature, "class", tss]) + ".csv"),
os.path.join(self.stat_outfolder, tss, "_".join([
"stat", feature, "libs", tss]) + ".csv"))
self.helper.move_all_content(os.getcwd(), os.path.join(
self.stat_outfolder, tss), ["_class", ".png"])
if os.path.exists(os.path.join(
self.stat_outfolder, "TSSstatistics.tsv")):
shutil.move(
os.path.join(
self.stat_outfolder, "TSSstatistics.tsv"),
os.path.join(
self.stat_outfolder, tss, "TSSstatistics.tsv"))
plot_venn(compare_file, feature)
self.helper.move_all_content(os.getcwd(), os.path.join(
self.stat_outfolder, tss), ["_venn", ".png"])
def _set_gen_config(self, args_tss, input_folder):
prefixs = []
示例9: TranscriptAssembly
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import sort_gff [as 别名]
class TranscriptAssembly(object):
def __init__(self, args_tran):
self.multiparser = Multiparser()
self.helper = Helper()
self.converter = Converter()
self.gff_outfolder = os.path.join(args_tran.out_folder, "gffs")
self.tran_path = os.path.join(self.gff_outfolder, "tmp")
self.stat_path = os.path.join(args_tran.out_folder, "statistics")
self.tmps = {"gff": "tmp.gff", "merge": "tmp_merge",
"tran": os.path.join(args_tran.out_folder, "tmp_tran"),
"tss_ta": os.path.join(self.gff_outfolder, "tmp_tss_ta"),
"ta_tss": os.path.join(self.gff_outfolder, "tmp_ta_tss"),
"ta_gff": os.path.join(self.gff_outfolder, "tmp_ta_gff"),
"gff_ta": os.path.join(self.gff_outfolder, "tmp_gff_ta"),
"uni": os.path.join(self.gff_outfolder, "tmp_uni"),
"overlap": os.path.join(
self.gff_outfolder, "tmp_overlap")}
self.frag = "transcript_assembly_fragment.gff"
self.tex = "transcript_assembly_tex_notex.gff"
self.endfix_tran = "transcript.gff"
def _compute_transcript(self, wig_f, wig_r, wig_folder, wig_type, strain,
libs, args_tran):
print("Computing transcript assembly for {0}...".format(strain))
out = os.path.join(args_tran.out_folder, "_".join([strain, wig_type]))
assembly(wig_f, wig_r, wig_folder, libs, out, wig_type, args_tran)
def _compute(self, wig_type, wigs, libs, args_tran):
strains = []
wig_folder = os.path.join(wigs, "tmp")
for wig in os.listdir(wig_folder):
if wig.endswith("_forward.wig"):
strains.append(wig.replace("_forward.wig", ""))
for strain in strains:
f_file = os.path.join(wig_folder, "_".join(
[strain, "forward.wig"]))
r_file = os.path.join(wig_folder, "_".join(
[strain, "reverse.wig"]))
self._compute_transcript(f_file, r_file, wigs, wig_type,
strain, libs, args_tran)
return strains
def _compare_tss(self, tas, args_tran):
self.multiparser.parser_gff(args_tran.compare_tss, "TSS")
self.multiparser.combine_gff(
self.gff_outfolder,
os.path.join(args_tran.compare_tss, "tmp"),
"transcript", "TSS")
print("Comaring of Transcript assembly and TSS file...")
tss_folder = os.path.join(args_tran.compare_tss, "tmp")
for ta in tas:
ta_file = os.path.join(self.gff_outfolder,
"_".join([ta, self.endfix_tran]))
stat_tss_out = os.path.join(
self.stat_path, "".join([
"stat_compare_Transcriptome_assembly_TSS_",
ta, ".csv"]))
for tss in os.listdir(tss_folder):
filename = tss.split("_TSS")
if (filename[0] == ta) and (tss.endswith(".gff")):
stat_ta_tss(ta_file, os.path.join(tss_folder, tss),
stat_tss_out, self.tmps["ta_tss"],
self.tmps["tss_ta"], args_tran.fuzzy)
os.remove(ta_file)
os.remove(os.path.join(tss_folder, tss))
self.helper.sort_gff(self.tmps["ta_tss"], ta_file)
self.helper.sort_gff(
self.tmps["tss_ta"], os.path.join(
args_tran.compare_tss, tss))
os.remove(self.tmps["tss_ta"])
os.remove(self.tmps["ta_tss"])
def _compare_cds(self, tas, args_tran):
self.multiparser.parser_gff(args_tran.compare_cds, None)
self.multiparser.combine_gff(
self.gff_outfolder, os.path.join(args_tran.compare_cds, "tmp"),
"transcript", None)
print("Comaring of Transcript assembly and gene...")
cds_folder = os.path.join(args_tran.compare_cds, "tmp")
for ta in tas:
ta_file = os.path.join(self.gff_outfolder,
"_".join([ta, self.endfix_tran]))
stat_gff_out = os.path.join(self.stat_path, "".join([
"stat_compare_Transcriptome_assembly_gene_", ta, ".csv"]))
for gff in os.listdir(cds_folder):
if (gff[:-4] == ta) and (gff.endswith(".gff")):
cds_file = os.path.join(cds_folder, gff)
stat_ta_gff(ta_file, cds_file, stat_gff_out,
self.tmps["ta_gff"], self.tmps["gff_ta"],
args_tran.c_feature)
os.remove(ta_file)
os.remove(os.path.join(args_tran.compare_cds, gff))
self.helper.sort_gff(self.tmps["ta_gff"], ta_file)
self.helper.sort_gff(self.tmps["gff_ta"], os.path.join(
args_tran.compare_cds, gff))
os.remove(self.tmps["ta_gff"])
os.remove(self.tmps["gff_ta"])
def _compare_tss_cds(self, tas, args_tran):
#.........这里部分代码省略.........