本文整理汇总了Python中annogesiclib.helper.Helper.get_correct_file方法的典型用法代码示例。如果您正苦于以下问题:Python Helper.get_correct_file方法的具体用法?Python Helper.get_correct_file怎么用?Python Helper.get_correct_file使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类annogesiclib.helper.Helper
的用法示例。
在下文中一共展示了Helper.get_correct_file方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: TestHelper
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
class TestHelper(unittest.TestCase):
def setUp(self):
self.example = ExampleData()
self.helper = Helper()
self.gff_out = self.example.gff_out
self.rev_seq = self.example.rev_seq.replace("\n", "")
self.test_folder = "test_folder"
if (not os.path.exists(self.test_folder)):
os.mkdir(self.test_folder)
self.gff_file = os.path.join(self.test_folder, "test.gff")
with open(self.gff_file, "w") as rh:
rh.write(self.example.gff_file)
self.seq_file = os.path.join(self.test_folder, "test.fa")
with open(self.seq_file, "w") as rh:
rh.write(self.example.seq)
def tearDown(self):
if os.path.exists(self.test_folder):
shutil.rmtree(self.test_folder)
def test_remove_all_content(self):
tmp1 = os.path.join(self.test_folder, "tmp1.gff")
tmp2 = os.path.join(self.test_folder, "tmp2")
shutil.copyfile(self.gff_file, tmp1)
os.mkdir(tmp2)
self.helper.remove_all_content(self.test_folder, "tmp", "file")
self.assertFalse(os.path.exists(tmp1))
self.assertTrue(os.path.exists(tmp2))
self.helper.remove_all_content(self.test_folder, "tmp", "dir")
self.assertFalse(os.path.exists(tmp2))
self.assertTrue(os.path.exists(self.gff_file))
def test_remove_tmp(self):
tmp1 = os.path.join(self.test_folder, "tmp")
tmp2 = os.path.join(self.test_folder, "test.gff_folder")
os.mkdir(tmp1)
os.mkdir(tmp2)
self.helper.remove_tmp(self.test_folder)
self.assertFalse(os.path.exists(tmp1))
self.assertFalse(os.path.exists(tmp2))
def test_get_correct_file(self):
gff_file = os.path.join(self.test_folder, "test.gff")
wig_f_file = os.path.join(self.test_folder,
"test_forward.wig_STRAIN_aaa.wig")
wig_r_file = os.path.join(self.test_folder,
"test_reverse.wig_STRAIN_aaa.wig")
shutil.copyfile(gff_file, wig_f_file)
shutil.copyfile(gff_file, wig_r_file)
libs = ["test_forward.wig_STRAIN_aaa.wig:frag:1:a:+",
"test_reverse.wig_STRAIN_aaa.wig:frag:1:a:-"]
filename = self.helper.get_correct_file(
self.test_folder, ".gff", "test", None, libs)
self.assertEqual(filename, gff_file)
def test_sorf_gff(self):
out_file = os.path.join(self.test_folder, "test.out")
self.helper.sort_gff(self.gff_file, out_file)
datas = import_data(out_file)
self.assertEqual(set(datas), set(self.gff_out.split("\n")))
def test_extract_gene(self):
seq = self.example.seq.replace("\n", "")
new_seq = self.helper.extract_gene(seq, 1, 70, "+")
self.assertEqual(new_seq,
"CGCAGGTTGAGTTCCTGTTCCCGATAGATCCGATAAACCCGCTTATGATTCCAGAGCTGTCCCTGCACAT")
new_seq = self.helper.extract_gene(seq, 1, 140, "-")
self.assertEqual(new_seq, self.rev_seq)
def test_get_seq(self):
gff_file = os.path.join(self.test_folder, "test.gff")
out_file = os.path.join(self.test_folder, "test.cds")
lines = self.example.gff_out.split("\n")
with open(gff_file, "w") as gh:
gh.write(lines[1])
self.helper.get_seq(self.gff_file, self.seq_file, out_file)
datas = import_data(out_file)
self.assertEqual(set(datas), set([">cds0|aaa|1|10|+", "CGCAGGTTGA"]))
示例2: Terminator
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
class Terminator(object):
def __init__(self, args_term):
self.multiparser = Multiparser()
self.helper = Helper()
self.converter = Converter()
self.gff_parser = Gff3Parser()
self.gff_path = os.path.join(args_term.gffs, "tmp")
self.fasta_path = os.path.join(args_term.fastas, "tmp")
self.tran_path = os.path.join(args_term.trans, "tmp")
self.outfolder = {"term": os.path.join(args_term.out_folder, "gffs"),
"csv": os.path.join(args_term.out_folder, "tables")}
self.terms = {"all": os.path.join(self.outfolder["term"],
"all_candidates"),
"express": os.path.join(self.outfolder["term"],
"express"),
"best": os.path.join(self.outfolder["term"], "best"),
"non": os.path.join(self.outfolder["term"],
"non_express")}
self.csvs = {"all": os.path.join(self.outfolder["csv"],
"all_candidates"),
"express": os.path.join(self.outfolder["csv"], "express"),
"best": os.path.join(self.outfolder["csv"], "best"),
"non": os.path.join(self.outfolder["csv"], "non_express")}
self.combine_path = os.path.join(self.gff_path, "combine")
self.tmps = {"transterm": os.path.join(os.getcwd(), "tmp_transterm"),
"hp": "transtermhp", "hp_gff": "transtermhp.gff",
"hp_path": "tmp_transterm/tmp",
"term_table": os.path.join(os.getcwd(), "tmp_term_table"),
"merge": os.path.join(os.getcwd(), "tmp_merge_gff"),
"gff": "tmp.gff",
"folder": os.path.join(os.getcwd(), "tmp")}
self.suffixs = {"gff": "term.gff", "csv": "term.csv",
"allgff": "term_all.gff"}
if args_term.srnas:
self.srna_path = os.path.join(args_term.srnas, "tmp")
else:
self.srna_path = None
self._make_gff_folder()
def _combine_annotation(self, combine_file, files):
with open(combine_file, 'w') as result:
for file_ in files:
check_start = False
fh = open(file_, 'r')
for line in fh:
if check_start:
result.write(line)
if "Location" in line:
check_start = True
if "\n" not in line:
result.write("\n")
fh.close()
def _make_gff_folder(self):
self.helper.check_make_folder(self.terms["all"])
self.helper.check_make_folder(self.csvs["all"])
self.helper.check_make_folder(self.terms["best"])
self.helper.check_make_folder(self.csvs["best"])
self.helper.check_make_folder(self.terms["express"])
self.helper.check_make_folder(self.csvs["express"])
self.helper.check_make_folder(self.terms["non"])
self.helper.check_make_folder(self.csvs["non"])
def _convert_gff2rntptt(self, gff_path, fasta_path, sRNAs):
file_types = {}
prefixs = []
for gff in os.listdir(gff_path):
if gff.endswith(".gff"):
filename = gff.split("/")
prefix = filename[-1][:-4]
prefixs.append(prefix)
gff_file = os.path.join(gff_path, gff)
rnt_file = os.path.join(gff_path, gff.replace(".gff", ".rnt"))
ptt_file = os.path.join(gff_path, gff.replace(".gff", ".ptt"))
fasta = self.helper.get_correct_file(
fasta_path, ".fa", prefix, None, None)
if not fasta:
print("Error: no proper file - {0}.fa".format(prefix))
sys.exit()
if sRNAs:
self.multiparser.parser_gff(sRNAs, "sRNA")
srna = self.helper.get_correct_file(
self.srna_path, "_sRNA.gff", prefix, None, None)
if (srna) and (fasta):
self.converter.convert_gff2rntptt(
gff_file, fasta, ptt_file, rnt_file, srna,
srna.replace(".gff", ".rnt"))
file_types[prefix] = "srna"
if (not srna) and (fasta):
self.converter.convert_gff2rntptt(
gff_file, fasta, ptt_file, rnt_file, None, None)
file_types[prefix] = "normal"
else:
self.converter.convert_gff2rntptt(
gff_file, fasta, ptt_file, rnt_file, None, None)
file_types[prefix] = "normal"
return file_types, prefixs
def _combine_ptt_rnt(self, gff_path, file_types, srna_path):
#.........这里部分代码省略.........
示例3: RATT
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
class RATT(object):
def __init__(self, args_ratt):
self.multiparser = Multiparser()
self.converter = Converter()
self.format_fixer = FormatFixer()
self.helper = Helper()
self.gbk = os.path.join(args_ratt.ref_embls, "gbk_tmp")
self.gbk_tmp = os.path.join(self.gbk, "tmp")
self.embl = os.path.join(args_ratt.ref_embls, "embls")
self.ratt_log = os.path.join(args_ratt.output_path, "ratt_log.txt")
self.tmp_files = {"tar": os.path.join(args_ratt.tar_fastas, "tmp"),
"ref": os.path.join(args_ratt.ref_fastas, "tmp"),
"out_gff": os.path.join(args_ratt.gff_outfolder,
"tmp"),
"gff": os.path.join(args_ratt.gff_outfolder,
"tmp.gff"),
"ptt": os.path.join(args_ratt.gff_outfolder,
"tmp.ptt"),
"rnt": os.path.join(args_ratt.gff_outfolder,
"tmp.rnt")}
def _convert_to_pttrnt(self, gffs, files):
for gff in files:
if gff.endswith(".gff"):
gff = os.path.join(gffs, gff)
filename = gff.split("/")
prefix = filename[-1][:-4]
rnt = gff[:-3] + "rnt"
ptt = gff[:-3] + "ptt"
fasta = self.helper.get_correct_file(self.tmp_files["tar"],
".fa", prefix, None, None)
if fasta:
self.converter.convert_gff2rntptt(gff, fasta, ptt, rnt,
None, None)
def _remove_files(self, args_ratt, out_gbk):
self.helper.remove_all_content(args_ratt.gff_outfolder, ".gff", "file")
self.helper.remove_all_content(args_ratt.gff_outfolder, ".ptt", "file")
self.helper.remove_all_content(args_ratt.gff_outfolder, ".rnt", "file")
self.helper.move_all_content(self.tmp_files["out_gff"],
args_ratt.gff_outfolder, None)
shutil.rmtree(self.tmp_files["out_gff"])
shutil.rmtree(self.tmp_files["tar"])
shutil.rmtree(self.tmp_files["ref"])
shutil.rmtree(self.embl)
self.helper.remove_all_content(args_ratt.tar_fastas, "_folder", "dir")
self.helper.remove_all_content(args_ratt.ref_fastas, "_folder", "dir")
if out_gbk:
shutil.rmtree(out_gbk)
def _convert_to_gff(self, ratt_result, args_ratt, files):
name = ratt_result.split(".")
filename = ".".join(name[1:-2]) + ".gff"
output_file = os.path.join(args_ratt.output_path, filename)
self.converter.convert_embl2gff(
os.path.join(args_ratt.output_path, ratt_result), output_file)
self.format_fixer.fix_ratt(output_file, ".".join(name[1:-2]),
"tmp_gff")
shutil.move("tmp_gff", output_file)
shutil.copy(output_file, os.path.join(args_ratt.gff_outfolder,
filename))
files.append(filename)
def _parser_embl_gbk(self, files):
self.helper.check_make_folder(self.gbk)
for file_ in files:
close = False
with open(file_, "r") as f_h:
for line in f_h:
if (line.startswith("LOCUS")):
out = open(self.gbk_tmp, "w")
datas = line.split(" ")
for data in datas:
if (len(data) != 0) and (data != "LOCUS"):
filename = ".".join([data, "gbk"])
break
elif (line.startswith("VERSION")):
datas = line.split(" ")
for data in datas:
if (len(data) != 0) and (data != "VERSION"):
new_filename = ".".join([data, "gbk"])
break
if new_filename.find(filename):
filename = new_filename
if out:
out.write(line)
if line.startswith("//"):
out.close()
close = True
shutil.move(self.gbk_tmp,
os.path.join(self.gbk, filename))
if not close:
out.close()
return self.gbk
def _convert_embl(self, ref_embls):
detect_gbk = False
gbks = []
out_gbk = None
#.........这里部分代码省略.........
示例4: sRNADetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
#.........这里部分代码省略.........
if database_type == "sRNA":
change_format(database, "tmp_srna_database")
os.remove(database)
shutil.move("tmp_srna_database", database)
db_file = ".".join(database.split(".")[:-1])
self._run_format(blast_path, database, type_, db_file, err)
err.close()
def _merge_frag_tex_file(self, files, args_srna):
if (args_srna.frag_wigs is not None) and (
args_srna.tex_wigs is not None):
self.helper.merge_file(files["frag_gff"], files["tex_gff"])
self.helper.merge_file(files["frag_csv"], files["tex_csv"])
shutil.move(files["tex_csv"], files["merge_csv"])
self.helper.sort_gff(files["tex_gff"], files["merge_gff"])
os.remove(files["frag_csv"])
os.remove(files["frag_gff"])
os.remove(files["tex_gff"])
elif (args_srna.frag_wigs is not None):
shutil.move(files["frag_csv"], files["merge_csv"])
self.helper.sort_gff(files["frag_gff"], files["merge_gff"])
os.remove(files["frag_gff"])
elif (args_srna.tex_wigs is not None):
shutil.move(files["tex_csv"], files["merge_csv"])
self.helper.sort_gff(files["tex_gff"], files["merge_gff"])
def _run_normal(self, prefix, gff, tran, fuzzy_tss, args_srna):
if "tmp_cutoff_inter" in os.listdir(args_srna.out_folder):
os.remove(os.path.join(args_srna.out_folder, "tmp_cutoff_inter"))
files = {"frag_gff": None, "frag_csv": None,
"tex_gff": None, "tex_csv": None,
"merge_gff": None, "merge_csv": None}
if ("tss" in args_srna.import_info):
tss = self.helper.get_correct_file(self.tss_path, "_TSS.gff",
prefix, None, None)
else:
tss = None
if self.pro_path is not None:
pro = self.helper.get_correct_file(
self.pro_path, "_processing.gff", prefix, None, None)
else:
pro = None
if args_srna.frag_wigs is not None:
files["frag_gff"] = os.path.join(
args_srna.out_folder, "_".join(["tmp_frag", prefix]))
files["frag_csv"] = os.path.join(
args_srna.out_folder, "_".join(["tmp_frag_table", prefix]))
args_srna = self.args_container.container_intersrna(
"frag", files, args_srna, prefix,
os.path.join(args_srna.gffs, gff), tran, tss,
pro, fuzzy_tss)
intergenic_srna(args_srna)
if args_srna.tex_wigs is not None:
files["tex_gff"] = os.path.join(
args_srna.out_folder, "_".join(["tmp_tex", prefix]))
files["tex_csv"] = os.path.join(
args_srna.out_folder, "_".join(["tmp_tex_table", prefix]))
args_srna = self.args_container.container_intersrna(
"tex", files, args_srna, prefix,
os.path.join(args_srna.gffs, gff), tran, tss,
pro, fuzzy_tss)
intergenic_srna(args_srna)
files["merge_csv"] = "_".join([self.prefixs["normal_table"], prefix])
files["merge_gff"] = "_".join([self.prefixs["normal"], prefix])
self._merge_frag_tex_file(files, args_srna)
示例5: OperonDetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
class OperonDetection(object):
'''detection of operon'''
def __init__(self, args_op):
self.multiparser = Multiparser()
self.helper = Helper()
if args_op.tsss is not None:
self.tss_path = os.path.join(args_op.tsss, "tmp")
else:
self.tss_path = None
self.tran_path = os.path.join(args_op.trans, "tmp")
self.table_path = os.path.join(args_op.output_folder, "tables")
if args_op.terms is not None:
self._check_gff(args_op.terms, "term")
self.term_path = os.path.join(args_op.terms, "tmp")
else:
self.term_path = None
def _check_gff(self, gffs, type_):
for gff in os.listdir(gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(gffs, gff))
def _detect_operon(self, prefixs, args_op, log):
log.write("Running detect_operon.py to detect operon.\n")
log.write("The the following files are generated:\n")
for prefix in prefixs:
out_gff = os.path.join(args_op.output_folder, "gffs",
"_".join([prefix, "operon.gff"]))
out_table = os.path.join(self.table_path,
"_".join([prefix, "operon.csv"]))
print("Detecting operons of {0}".format(prefix))
if self.tss_path is None:
tss = False
else:
tss = self.helper.get_correct_file(
self.tss_path, "_TSS.gff", prefix, None, None)
tran = self.helper.get_correct_file(
self.tran_path, "_transcript.gff", prefix, None, None)
gff = self.helper.get_correct_file(
args_op.gffs, ".gff", prefix, None, None)
if self.term_path is None:
term = False
else:
term = self.helper.get_correct_file(
self.term_path, "_term.gff", prefix, None, None)
operon(tran, tss, gff, term, args_op.tss_fuzzy,
args_op.term_fuzzy, args_op.length, out_table, out_gff)
log.write("\t" + out_table + "\n")
log.write("\t" + out_gff + "\n")
def _check_and_parser_gff(self, args_op):
self._check_gff(args_op.gffs, "gff")
self._check_gff(args_op.trans, "tran")
self.multiparser.parser_gff(args_op.gffs, None)
self.multiparser.parser_gff(args_op.trans, "transcript")
self.multiparser.combine_gff(args_op.gffs, self.tran_path,
None, "transcript")
if args_op.tsss is not None:
self._check_gff(args_op.tsss, "tss")
self.multiparser.parser_gff(args_op.tsss, "TSS")
self.multiparser.combine_gff(args_op.gffs, self.tss_path, None, "TSS")
if args_op.terms is not None:
self._check_gff(args_op.terms, "term")
self.multiparser.parser_gff(args_op.terms, "term")
self.multiparser.combine_gff(args_op.gffs, self.term_path,
None, "term")
def _stat(self, table_path, stat_folder, log):
log.write("Running stat_operon.py to do statistics.\n")
for table in os.listdir(table_path):
if table.endswith("_operon.csv"):
filename = "_".join(["stat", table])
out_stat = os.path.join(stat_folder, filename)
stat(os.path.join(table_path, table), out_stat)
log.write("\t" + out_stat + "\n")
def run_operon(self, args_op, log):
self._check_and_parser_gff(args_op)
prefixs = []
for gff in os.listdir(args_op.gffs):
if gff.endswith(".gff"):
prefixs.append(gff.replace(".gff", ""))
self._detect_operon(prefixs, args_op, log)
self._stat(self.table_path, args_op.stat_folder, log)
self.helper.remove_tmp_dir(args_op.gffs)
self.helper.remove_tmp_dir(args_op.tsss)
self.helper.remove_tmp_dir(args_op.trans)
if args_op.terms is not None:
self.helper.remove_tmp_dir(args_op.terms)
示例6: OperonDetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
class OperonDetection(object):
def __init__(self, args_op):
self.multiparser = Multiparser()
self.helper = Helper()
self.tss_path = os.path.join(args_op.tsss, "tmp")
self.tran_path = os.path.join(args_op.trans, "tmp")
self.utr5_path = os.path.join(args_op.utr5s, "tmp")
self.utr3_path = os.path.join(args_op.utr3s, "tmp")
self.table_path = os.path.join(args_op.output_folder, "tables")
if args_op.terms is not None:
self._check_gff(args_op.terms, "term")
self.term_path = os.path.join(args_op.terms, "tmp")
else:
self.term_path = None
def _check_gff(self, gffs, type_):
for gff in os.listdir(gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(gffs, gff))
def _detect_operon(self, prefixs, args_op):
for prefix in prefixs:
out_table = os.path.join(self.table_path,
"_".join(["operon", prefix + ".csv"]))
print("Detection operons of {0}".format(prefix))
tss = self.helper.get_correct_file(
self.tss_path, "_TSS.gff", prefix, None, None)
tran = self.helper.get_correct_file(
self.tran_path, "_transcript.gff", prefix, None, None)
gff = self.helper.get_correct_file(
args_op.gffs, ".gff", prefix, None, None)
if self.term_path is None:
term = False
else:
term = self.helper.get_correct_file(
self.term_path, "_term.gff", prefix, None, None)
operon(tran, tss, gff, term, args_op.tss_fuzzy,
args_op.term_fuzzy, args_op.length, out_table)
def _check_and_parser_gff(self, args_op):
self._check_gff(args_op.tsss, "tss")
self._check_gff(args_op.gffs, "gff")
self._check_gff(args_op.trans, "tran")
self._check_gff(args_op.utr5s, "utr")
self._check_gff(args_op.utr3s, "utr")
self.multiparser.parser_gff(args_op.gffs, None)
self.multiparser.parser_gff(args_op.tsss, "TSS")
self.multiparser.combine_gff(args_op.gffs, self.tss_path, None, "TSS")
self.multiparser.parser_gff(args_op.trans, "transcript")
self.multiparser.combine_gff(args_op.gffs, self.tran_path,
None, "transcript")
self.multiparser.parser_gff(args_op.utr5s, "5UTR")
self.multiparser.combine_gff(args_op.gffs, self.utr5_path,
None, "5UTR")
self.multiparser.parser_gff(args_op.utr3s, "3UTR")
self.multiparser.combine_gff(args_op.gffs, self.utr3_path,
None, "3UTR")
if args_op.terms is not None:
self._check_gff(args_op.terms, "term")
self.multiparser.parser_gff(args_op.terms, "term")
self.multiparser.combine_gff(args_op.gffs, self.term_path,
None, "term")
def _stat(self, table_path, stat_folder):
for table in os.listdir(table_path):
if table.startswith("operon_") and table.endswith(".csv"):
filename = "_".join(["stat", table])
out_stat = os.path.join(stat_folder, filename)
stat(os.path.join(table_path, table), out_stat)
def _combine_gff(self, prefixs, args_op):
for prefix in prefixs:
out_file = os.path.join(args_op.output_folder, "gffs",
"_".join([prefix, "all_features.gff"]))
print("Combine all features of {0}".format(prefix))
tss = self.helper.get_correct_file(
self.tss_path, "_TSS.gff", prefix, None, None)
tran = self.helper.get_correct_file(
self.tran_path, "_transcript.gff", prefix, None, None)
gff = self.helper.get_correct_file(
args_op.gffs, ".gff", prefix, None, None)
utr5 = self.helper.get_correct_file(
self.utr5_path, "_5UTR.gff", prefix, None, None)
utr3 = self.helper.get_correct_file(
self.utr3_path, "_3UTR.gff", prefix, None, None)
if self.term_path is None:
term = None
else:
term = self.helper.get_correct_file(
self.term_path, "_term.gff", prefix, None, None)
combine_gff(gff, tran, tss, utr5, utr3, term,
args_op.tss_fuzzy, args_op.term_fuzzy, out_file)
def run_operon(self, args_op):
self._check_and_parser_gff(args_op)
prefixs = []
for gff in os.listdir(args_op.gffs):
if gff.endswith(".gff"):
prefixs.append(gff.replace(".gff", ""))
#.........这里部分代码省略.........
示例7: SubLocal
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
class SubLocal(object):
def __init__(self, args_sub):
self.multiparser = Multiparser()
self.helper = Helper()
self.fixer = FormatFixer()
self.gff_path = os.path.join(args_sub.gffs, "tmp")
self.fasta_path = os.path.join(args_sub.fastas, "tmp")
if args_sub.trans is not None:
self.tran_path = os.path.join(args_sub.trans, "tmp")
else:
self.tran_path = None
self.out_all = os.path.join(args_sub.out_folder, "all_CDS")
self.out_express = os.path.join(args_sub.out_folder, "expressed_CDS")
self.all_tmp_path = os.path.join(self.out_all, "tmp")
self.express_tmp_path = os.path.join(self.out_express, "tmp")
self.all_stat_path = os.path.join(self.out_all, "statistics")
self.express_stat_path = os.path.join(self.out_express, "statistics")
self.all_tmp_result = os.path.join(self.out_all, "tmp_results")
self.express_tmp_result = os.path.join(self.out_express, "tmp_results")
self.all_result = os.path.join(self.out_all, "psortb_results")
self.express_result = os.path.join(self.out_express, "psortb_results")
self.endfix_table = "table.csv"
self.endfix_raw = "raw.txt"
self._make_folder()
def _make_folder(self):
self.helper.check_make_folder(self.out_all)
self.helper.check_make_folder(self.out_express)
self.helper.check_make_folder(self.all_stat_path)
self.helper.check_make_folder(self.express_stat_path)
self.helper.check_make_folder(self.all_result)
self.helper.check_make_folder(self.express_result)
def _compare_cds_tran(self, gff_file, tran_file):
out = open(os.path.join(self.out_all, "tmp_cds.gff"), "w")
cdss = []
fh = open(gff_file)
th = open(tran_file)
for entry in Gff3Parser().entries(fh):
if entry.feature == "CDS":
cdss.append(entry)
trans = []
for entry in Gff3Parser().entries(th):
trans.append(entry)
for cds in cdss:
for ta in trans:
if (cds.strand == ta.strand) and (
cds.seq_id == ta.seq_id):
if ((cds.end < ta.end) and (
cds.end > ta.start) and (
cds.start <= ta.start)) or (
(cds.start > ta.start) and (
cds.start < ta.end) and (
cds.end >= ta.end)) or (
(cds.end >= ta.end) and (
cds.start <= ta.start)) or (
(cds.end <= ta.end) and (
cds.start >= ta.start)):
out.write(cds.info + "\n")
break
fh.close()
th.close()
out.close()
def _get_protein_seq(self, gff, tmp_path, tran_path):
prefix = gff.replace(".gff", "")
fasta = self.helper.get_correct_file(self.fasta_path, ".fa",
prefix, None, None)
dna_seq_file = os.path.join(tmp_path, "_".join([prefix, "dna.fa"]))
print("Generate CDS fasta files of {0}".format(prefix))
if tran_path is not None:
self._compare_cds_tran(os.path.join(self.gff_path, gff),
os.path.join(tran_path, "_".join([
prefix, "transcript.gff"])))
self.helper.get_cds_seq(os.path.join(self.out_all, "tmp_cds.gff"),
fasta, dna_seq_file)
os.remove(os.path.join(self.out_all, "tmp_cds.gff"))
else:
self.helper.get_cds_seq(os.path.join(self.gff_path, gff),
fasta, dna_seq_file)
print("transfer DNA seq to protein seq of {0}".format(prefix))
self.helper.translation(dna_seq_file, "tmp")
prot_seq_file = os.path.join(
tmp_path, "_".join([prefix, "protein.fa"]))
self.fixer.fix_emboss("tmp", prot_seq_file)
os.remove("tmp")
return prefix
def _psortb(self, psortb_path, strain_type, prot_seq_file,
out_raw, out_err):
call([psortb_path, strain_type, prot_seq_file],
stdout=out_raw, stderr=out_err)
def _run_psortb(self, args_sub, prefix, out_folder, tmp_path, tmp_result):
print("Running psortb of {0}".format(prefix))
out_err = open(os.path.join(out_folder, "tmp_log"), "w")
out_raw = open(os.path.join(tmp_result,
"_".join([prefix, self.endfix_raw])), "w")
prot_seq_file = os.path.join(tmp_path,
#.........这里部分代码省略.........
示例8: Terminator
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
class Terminator(object):
'''detection of terminator'''
def __init__(self, args_term):
self.multiparser = Multiparser()
self.helper = Helper()
self.converter = Converter()
self.gff_parser = Gff3Parser()
self.gff_path = os.path.join(args_term.gffs, "tmp")
self.fasta_path = os.path.join(args_term.fastas, "tmp")
self.tran_path = os.path.join(args_term.trans, "tmp")
self.outfolder = {"term": os.path.join(args_term.out_folder, "gffs"),
"csv": os.path.join(args_term.out_folder, "tables")}
self.terms = {"all": os.path.join(self.outfolder["term"],
"all_candidates"),
"express": os.path.join(self.outfolder["term"],
"expressed_candidates"),
"best": os.path.join(self.outfolder["term"],
"best_candidates"),
"non": os.path.join(self.outfolder["term"],
"non_expressed_candidates")}
self.csvs = {"all": os.path.join(self.outfolder["csv"],
"all_candidates"),
"express": os.path.join(self.outfolder["csv"],
"expressed_candidates"),
"best": os.path.join(self.outfolder["csv"],
"best_candidates"),
"non": os.path.join(self.outfolder["csv"],
"non_expressed_candidates")}
self.combine_path = os.path.join(self.gff_path, "combine")
self.tmps = {"transterm": os.path.join(os.getcwd(), "tmp_transterm"),
"hp": "transtermhp", "hp_gff": "transtermhp.gff",
"hp_path": "tmp_transterm/tmp",
"term_table": os.path.join(os.getcwd(), "tmp_term_table"),
"merge": os.path.join(os.getcwd(), "tmp_merge_gff"),
"gff": "tmp.gff",
"folder": os.path.join(os.getcwd(), "tmp")}
self.suffixs = {"gff": "term.gff", "csv": "term.csv",
"allgff": "term_all.gff"}
if args_term.srnas:
self.srna_path = os.path.join(args_term.srnas, "tmp")
else:
self.srna_path = None
self._make_gff_folder()
def _combine_annotation(self, combine_file, files):
with open(combine_file, 'w') as result:
for file_ in files:
if (file_.endswith(".ptt")) and (os.stat(file_).st_size == 0):
print("Warning: No CDS information, "
"TransTermHP can not work!")
return "NO_CDS"
if os.path.exists(file_) and (
os.stat(file_).st_size != 0):
check_start = False
fh = open(file_, 'r')
for line in fh:
if check_start:
result.write(line)
if "Location" in line:
check_start = True
if "\n" not in line:
result.write("\n")
fh.close()
return "Normal"
def _make_gff_folder(self):
self.helper.check_make_folder(self.terms["all"])
self.helper.check_make_folder(self.csvs["all"])
self.helper.check_make_folder(self.terms["best"])
self.helper.check_make_folder(self.csvs["best"])
self.helper.check_make_folder(self.terms["express"])
self.helper.check_make_folder(self.csvs["express"])
self.helper.check_make_folder(self.terms["non"])
self.helper.check_make_folder(self.csvs["non"])
def _convert_gff2rntptt(self, gff_path, fasta_path, sRNAs, log):
file_types = {}
prefixs = []
for gff in os.listdir(gff_path):
if gff.endswith(".gff"):
filename = gff.split("/")
prefix = filename[-1][:-4]
prefixs.append(prefix)
gff_file = os.path.join(gff_path, gff)
rnt_file = os.path.join(gff_path, gff.replace(".gff", ".rnt"))
ptt_file = os.path.join(gff_path, gff.replace(".gff", ".ptt"))
fasta = self.helper.get_correct_file(
fasta_path, ".fa", prefix, None, None)
if not fasta:
log.write("{0}.fa can not be found.\n".format(prefix))
print("Error: {0}.fa can not be found!".format(prefix))
sys.exit()
if sRNAs:
self.multiparser.parser_gff(sRNAs, "sRNA")
srna = self.helper.get_correct_file(
self.srna_path, "_sRNA.gff", prefix, None, None)
if (srna) and (fasta):
log.write("Running converter.py to convert {0} and "
"{1} to {2}, {3}, and {4}.\n".format(
#.........这里部分代码省略.........
示例9: SubLocal
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
class SubLocal(object):
'''detection of subcellular localization'''
def __init__(self, args_sub):
self.multiparser = Multiparser()
self.helper = Helper()
self.fixer = FormatFixer()
self.gff_path = os.path.join(args_sub.gffs, "tmp")
self.fasta_path = os.path.join(args_sub.fastas, "tmp")
if args_sub.trans is not None:
self.tran_path = os.path.join(args_sub.trans, "tmp")
else:
self.tran_path = None
self.out_all = os.path.join(args_sub.out_folder, "all_CDSs")
self.out_express = os.path.join(args_sub.out_folder, "expressed_CDSs")
self.all_tmp_path = os.path.join(self.out_all, "tmp")
self.express_tmp_path = os.path.join(self.out_express, "tmp")
self.all_stat_path = os.path.join(self.out_all, "statistics")
self.express_stat_path = os.path.join(self.out_express, "statistics")
self.all_tmp_result = os.path.join(self.out_all, "tmp_results")
self.express_tmp_result = os.path.join(self.out_express, "tmp_results")
self.all_result = os.path.join(self.out_all, "psortb_results")
self.express_result = os.path.join(self.out_express, "psortb_results")
self.endfix_table = "table.csv"
self.endfix_raw = "raw.txt"
self._make_folder()
def _make_folder(self):
self.helper.check_make_folder(self.out_all)
self.helper.check_make_folder(self.out_express)
self.helper.check_make_folder(self.all_stat_path)
self.helper.check_make_folder(self.express_stat_path)
self.helper.check_make_folder(self.all_result)
self.helper.check_make_folder(self.express_result)
def _compare_cds_tran(self, gff_file, tran_file, log):
'''compare CDS and transcript to find the expressed CDS'''
log.write("Comparing transcripts and CDSs to get expressed CDSs.\n")
out = open(os.path.join(self.out_all, "tmp_cds.gff"), "w")
cdss = []
fh = open(gff_file)
th = open(tran_file)
for entry in Gff3Parser().entries(fh):
if entry.feature == "CDS":
cdss.append(entry)
trans = []
for entry in Gff3Parser().entries(th):
trans.append(entry)
for cds in cdss:
for ta in trans:
if (cds.strand == ta.strand) and (
cds.seq_id == ta.seq_id):
if ((cds.end < ta.end) and (
cds.end > ta.start) and (
cds.start <= ta.start)) or (
(cds.start > ta.start) and (
cds.start < ta.end) and (
cds.end >= ta.end)) or (
(cds.end >= ta.end) and (
cds.start <= ta.start)) or (
(cds.end <= ta.end) and (
cds.start >= ta.start)):
out.write(cds.info + "\n")
break
fh.close()
th.close()
out.close()
log.write("\t" + os.path.join(self.out_all, "tmp_cds.gff") + " is "
"temporary generated.\n")
def _get_protein_seq(self, gff, tmp_path, tran_path, args_sub, log):
prefix = gff.replace(".gff", "")
fasta = self.helper.get_correct_file(self.fasta_path, ".fa",
prefix, None, None)
dna_seq_file = os.path.join(tmp_path, "_".join([prefix, "dna.fa"]))
print("Generating CDS fasta files of {0}".format(prefix))
if tran_path is not None:
log.write("Predicting subcellular localization for expressed "
"CDSs for {0}.\n".format(prefix))
self._compare_cds_tran(os.path.join(self.gff_path, gff),
os.path.join(tran_path, "_".join([
prefix, "transcript.gff"])), log)
log.write("Running helper.py to extract sequences for CDSs.\n")
self.helper.get_cds_seq(os.path.join(self.out_all, "tmp_cds.gff"),
fasta, dna_seq_file)
os.remove(os.path.join(self.out_all, "tmp_cds.gff"))
else:
log.write("Predicting subcellular localization for all CDSs for "
"{0}.\n".format(prefix))
log.write("Running helper.py to extract sequences for CDSs.\n")
self.helper.get_cds_seq(os.path.join(self.gff_path, gff),
fasta, dna_seq_file)
log.write("\t" + dna_seq_file + " is generated.\n")
print("Transfering DNA sequences to protein sequence of {0}".format(
prefix))
log.write("Running helper.py to translate DNA sequences to Protein "
"sequences.\n")
tmp_file = os.path.join(args_sub.out_folder, "tmp")
self.helper.translation(dna_seq_file, tmp_file)
prot_seq_file = os.path.join(
#.........这里部分代码省略.........
示例10: RATT
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
class RATT(object):
'''annotation transfer'''
def __init__(self, args_ratt):
self.multiparser = Multiparser()
self.converter = Converter()
self.format_fixer = FormatFixer()
self.helper = Helper()
if args_ratt.ref_gbk:
self.gbk = os.path.join(args_ratt.ref_gbk, "gbk_tmp")
self.gbk_tmp = os.path.join(self.gbk, "tmp")
self.embl = os.path.join(args_ratt.ref_gbk, "embls")
if args_ratt.ref_embls:
self.embl = args_ratt.ref_embls
self.ratt_log = os.path.join(args_ratt.output_path, "ratt_log.txt")
self.tmp_files = {"tar": os.path.join(args_ratt.tar_fastas, "tmp"),
"ref": os.path.join(args_ratt.ref_fastas, "tmp"),
"out_gff": os.path.join(args_ratt.gff_outfolder,
"tmp"),
"gff": os.path.join(args_ratt.gff_outfolder,
"tmp.gff"),
"ptt": os.path.join(args_ratt.gff_outfolder,
"tmp.ptt"),
"rnt": os.path.join(args_ratt.gff_outfolder,
"tmp.rnt")}
def _convert_to_pttrnt(self, gffs, files, log):
for gff in files:
if gff.endswith(".gff"):
gff = os.path.join(gffs, gff)
filename = gff.split("/")
prefix = filename[-1][:-4]
rnt = gff[:-3] + "rnt"
ptt = gff[:-3] + "ptt"
fasta = self.helper.get_correct_file(self.tmp_files["tar"],
".fa", prefix, None, None)
if fasta:
self.converter.convert_gff2rntptt(gff, fasta, ptt, rnt,
None, None)
log.write("\t" + ptt + " is generated.\n")
log.write("\t" + rnt + " is generated.\n")
def _remove_files(self, args_ratt, out_gbk, log):
self.helper.remove_all_content(args_ratt.gff_outfolder, ".gff", "file")
self.helper.remove_all_content(args_ratt.gff_outfolder, ".ptt", "file")
self.helper.remove_all_content(args_ratt.gff_outfolder, ".rnt", "file")
log.write("Moving the final output files to {0}.\n".format(args_ratt.gff_outfolder))
self.helper.move_all_content(self.tmp_files["out_gff"],
args_ratt.gff_outfolder, None)
log.write("Remove the temperary files.\n")
shutil.rmtree(self.tmp_files["out_gff"])
shutil.rmtree(self.tmp_files["tar"])
shutil.rmtree(self.tmp_files["ref"])
self.helper.remove_tmp_dir(args_ratt.tar_fastas)
self.helper.remove_tmp_dir(args_ratt.ref_fastas)
self.helper.remove_tmp_dir(args_ratt.ref_embls)
self.helper.remove_tmp_dir(args_ratt.ref_gbk)
def _convert_to_gff(self, ratt_result, args_ratt, files, log):
name = ratt_result.split(".")
filename = ".".join(name[1:-2]) + ".gff"
output_file = os.path.join(args_ratt.output_path, filename)
self.converter.convert_embl2gff(
os.path.join(args_ratt.output_path, ratt_result), output_file)
self.format_fixer.fix_ratt(output_file, ".".join(name[1:-2]),
"tmp_gff")
shutil.move("tmp_gff", output_file)
shutil.copy(output_file, os.path.join(args_ratt.gff_outfolder,
filename))
log.write("\t" + os.path.join(args_ratt.gff_outfolder, filename) +
" is generated.\n")
files.append(filename)
def _parser_embl_gbk(self, files):
self.helper.check_make_folder(self.gbk)
for file_ in files:
close = False
with open(file_, "r") as f_h:
for line in f_h:
if (line.startswith("LOCUS")):
out = open(self.gbk_tmp, "w")
datas = line.split(" ")
for data in datas:
if (len(data) != 0) and (data != "LOCUS"):
filename = ".".join([data.strip(), "gbk"])
break
elif (line.startswith("VERSION")):
datas = line.split(" ")
for data in datas:
if (len(data) != 0) and (data != "VERSION"):
new_filename = ".".join([data.strip(), "gbk"])
break
if new_filename.find(filename):
filename = new_filename
if out:
out.write(line)
if line.startswith("//"):
out.close()
close = True
shutil.move(self.gbk_tmp,
#.........这里部分代码省略.........
示例11: TSSpredator
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
#.........这里部分代码省略.........
tmp_tss = os.path.join(self.tmps["tmp"], "_".join([
prefix, args_tss.program + ".gff"]))
pre_tss = os.path.join(self.gff_outfolder, "_".join([
prefix, args_tss.program + ".gff"]))
check_orphan(pre_tss, os.path.join(
args_tss.gffs, prefix + ".gff"),
"tmp/merge_forward.wig", "tmp/merge_reverse.wig", tmp_tss)
shutil.move(tmp_tss, pre_tss)
shutil.rmtree("tmp")
def _remove_files(self, args_tss):
print("Remove temperary files and folders")
self.helper.remove_tmp_dir(args_tss.fastas)
self.helper.remove_tmp_dir(args_tss.gffs)
self.helper.remove_tmp_dir(args_tss.ta_files)
if "merge_forward.wig" in os.listdir(os.getcwd()):
os.remove("merge_forward.wig")
if "merge_reverse.wig" in os.listdir(os.getcwd()):
os.remove("merge_reverse.wig")
shutil.rmtree(args_tss.wig_folder)
if args_tss.manual is not None:
shutil.rmtree(args_tss.manual)
def _deal_with_overlap(self, out_folder, args_tss):
'''deal with the situation that TSS and
processing site at the same position'''
if not args_tss.overlap_feature:
pass
else:
print("Comparing TSSs and Processing sites")
if args_tss.program.lower() == "tss":
for tss in os.listdir(out_folder):
if tss.endswith("_TSS.gff"):
ref = self.helper.get_correct_file(
args_tss.overlap_gffs, "_processing.gff",
tss.replace("_TSS.gff", ""), None, None)
filter_tss_pro(os.path.join(out_folder, tss),
ref, args_tss.program,
args_tss.cluster)
elif args_tss.program.lower() == "processing":
for tss in os.listdir(out_folder):
if tss.endswith("_processing.gff"):
ref = self.helper.get_correct_file(
args_tss.overlap_gffs, "_TSS.gff",
tss.replace("_processing.gff", ""), None, None)
filter_tss_pro(os.path.join(out_folder, tss),
ref, args_tss.program,
args_tss.cluster)
def _low_expression(self, args_tss, gff_folder):
'''deal with the low expressed TSS'''
prefix = None
self._merge_wigs(args_tss.wig_folder, "wig", args_tss.libs)
for gff in os.listdir(gff_folder):
if (args_tss.program.lower() == "tss") and (
gff.endswith("_TSS.gff")):
prefix = gff.replace("_TSS.gff", "")
elif (args_tss.program.lower() == "processing") and (
gff.endswith("_processing.gff")):
prefix = gff.replace("_processing.gff", "")
if prefix:
out = open(os.path.join(
self.stat_outfolder, prefix, "_".join([
"stat", prefix, "low_expression_cutoff.csv"])), "w")
out.write("\t".join(["Genome", "Cutoff_coverage"]) + "\n")
cutoff = filter_low_expression(
示例12: UTRDetection
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
class UTRDetection(object):
def __init__(self, args_utr):
self.helper = Helper()
self.multiparser = Multiparser()
self.tss_path = os.path.join(args_utr.tsss, "tmp")
self.tran_path = os.path.join(args_utr.trans, "tmp")
self.utr5_path = os.path.join(args_utr.out_folder, "5UTR")
self.utr3_path = os.path.join(args_utr.out_folder, "3UTR")
self.utr5_stat_path = os.path.join(self.utr5_path, "statistics")
self.utr3_stat_path = os.path.join(self.utr3_path, "statistics")
def _check_folder(self, folder):
if folder is None:
print("Error: lack required files!!!")
sys.exit()
def _check_gff(self, folder):
for gff in os.listdir(folder):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(folder, gff))
def _compute_utr(self, args_utr):
for gff in os.listdir(args_utr.gffs):
if gff.endswith(".gff"):
prefix = gff[:-4]
tss = self.helper.get_correct_file(
self.tss_path, "_TSS.gff", prefix, None, None)
tran = self.helper.get_correct_file(
self.tran_path, "_transcript.gff", prefix, None, None)
if args_utr.terms:
term = self.helper.get_correct_file(
os.path.join(args_utr.terms, "tmp"),
"_term.gff", prefix, None, None)
else:
term = None
print("computing 5'UTR of {0} .....".format(prefix))
detect_5utr(tss, os.path.join(args_utr.gffs, gff),
tran, os.path.join(self.utr5_path, "gffs",
"_".join([prefix, "5UTR.gff"])), args_utr)
print("computing 3'UTR of {0} .....".format(prefix))
detect_3utr(tran, os.path.join(args_utr.gffs, gff),
term, os.path.join(self.utr3_path, "gffs",
"_".join([prefix, "3UTR.gff"])), args_utr)
self.helper.move_all_content(
os.getcwd(), self.utr5_stat_path, ["_5utr_length.png"])
self.helper.move_all_content(
os.getcwd(), self.utr3_stat_path, ["_3utr_length.png"])
def run_utr_detection(self, args_utr):
self._check_folder(args_utr.tsss)
self._check_folder(args_utr.gffs)
self._check_folder(args_utr.trans)
self._check_gff(args_utr.tsss)
self._check_gff(args_utr.gffs)
self._check_gff(args_utr.trans)
self._check_gff(args_utr.terms)
self.multiparser.parser_gff(args_utr.gffs, None)
self.multiparser.parser_gff(args_utr.tsss, "TSS")
self.multiparser.combine_gff(args_utr.gffs, self.tss_path, None, "TSS")
self.multiparser.parser_gff(args_utr.trans, "transcript")
self.multiparser.combine_gff(args_utr.gffs, self.tran_path,
None, "transcript")
if args_utr.terms:
self.multiparser.parser_gff(args_utr.terms, "term")
self.multiparser.combine_gff(args_utr.gffs,
os.path.join(args_utr.terms, "tmp"),
None, "term")
self._compute_utr(args_utr)
self.helper.remove_tmp(args_utr.gffs)
self.helper.remove_tmp(args_utr.tsss)
self.helper.remove_tmp(args_utr.trans)
self.helper.remove_tmp(args_utr.terms)
self.helper.remove_tmp(self.utr5_path)
self.helper.remove_tmp(self.utr3_path)
示例13: TSSpredator
# 需要导入模块: from annogesiclib.helper import Helper [as 别名]
# 或者: from annogesiclib.helper.Helper import get_correct_file [as 别名]
#.........这里部分代码省略.........
def _check_orphan(self, prefixs, wig_folder, args_tss):
for prefix in prefixs:
self._merge_wigs(wig_folder, prefix, args_tss.libs)
tmp_tss = os.path.join(self.tmps["tmp"], "_".join([
prefix, args_tss.program + ".gff"]))
pre_tss = os.path.join(self.gff_outfolder, "_".join([
prefix, args_tss.program + ".gff"]))
check_orphan(pre_tss, os.path.join(
args_tss.gffs, prefix + ".gff"),
"tmp/merge_forward.wig", "tmp/merge_reverse.wig", tmp_tss)
shutil.move(tmp_tss, pre_tss)
shutil.rmtree("tmp")
def _remove_files(self, args_tss):
print("Remove temperary files and folders...")
self.helper.remove_tmp(args_tss.fastas)
self.helper.remove_tmp(args_tss.gffs)
self.helper.remove_tmp(args_tss.wig_folder)
self.helper.remove_tmp(args_tss.ta_files)
if "merge_forward.wig" in os.listdir(os.getcwd()):
os.remove("merge_forward.wig")
if "merge_reverse.wig" in os.listdir(os.getcwd()):
os.remove("merge_reverse.wig")
def _deal_with_overlap(self, out_folder, args_tss):
if args_tss.overlap_feature.lower() == "both":
pass
else:
print("Comparing TSS and Processing site...")
if args_tss.program.lower() == "tss":
for tss in os.listdir(out_folder):
if tss.endswith("_TSS.gff"):
ref = self.helper.get_correct_file(
args_tss.references, "_processing.gff",
tss.replace("_TSS.gff", ""), None, None)
filter_tss_pro(os.path.join(out_folder, tss),
ref, args_tss.overlap_feature,
args_tss.cluster)
elif args_tss.program.lower() == "processing_site":
for tss in os.listdir(out_folder):
if tss.endswith("_processing.gff"):
ref = self.helper.get_correct_file(
args_tss.references, "_TSS.gff",
tss.replace("_processing.gff", ""), None, None)
filter_tss_pro(os.path.join(out_folder, tss),
ref, args_tss.overlap_feature,
args_tss.cluster)
def _low_expression(self, args_tss, gff_folder):
prefix = None
self._merge_wigs(args_tss.wig_folder, "wig", args_tss.libs)
for gff in os.listdir(gff_folder):
if (args_tss.program.lower() == "tss") and (
gff.endswith("_TSS.gff")):
prefix = gff.replace("_TSS.gff", "")
elif (args_tss.program.lower() == "processing") and (
gff.endswith("_processing.gff")):
prefix = gff.replace("_processing.gff", "")
if prefix:
out = open(os.path.join(
self.stat_outfolder, prefix, "_".join([
"stat", prefix, "low_expression_cutoff.csv"])), "w")
out.write("\t".join(["strain", "cutoff_coverage"]) + "\n")
cutoff = filter_low_expression(
os.path.join(gff_folder, gff), args_tss,