本文整理汇总了Python中annogesiclib.multiparser.Multiparser.combine_fasta方法的典型用法代码示例。如果您正苦于以下问题:Python Multiparser.combine_fasta方法的具体用法?Python Multiparser.combine_fasta怎么用?Python Multiparser.combine_fasta使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类annogesiclib.multiparser.Multiparser
的用法示例。
在下文中一共展示了Multiparser.combine_fasta方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: sORFDetection
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_fasta [as 别名]
#.........这里部分代码省略.........
os.path.join(args_sorf.wig_path,
"_".join([prefix, "forward.wig"])),
os.path.join(args_sorf.wig_path,
"_".join([prefix, "reverse.wig"])),
os.path.join(self.gff_output, self.all_cand,
"_".join([prefix, "sORF"])), args_sorf)
if "_".join([prefix, "sORF_all.gff"]) in os.listdir(
os.path.join(self.gff_output, self.all_cand)):
gff_all = os.path.join(self.gff_output, self.all_cand,
"_".join([prefix, "sORF.gff"]))
gff_best = os.path.join(self.gff_output, self.best,
"_".join([prefix, "sORF.gff"]))
csv_all = os.path.join(self.table_output, self.all_cand,
"_".join([prefix, "sORF.csv"]))
csv_best = os.path.join(self.table_output, self.best,
"_".join([prefix, "sORF.csv"]))
shutil.move(os.path.join(self.gff_output, self.all_cand,
"_".join([prefix, "sORF_all.gff"])), gff_all)
shutil.move(os.path.join(self.gff_output, self.all_cand,
"_".join([prefix, "sORF_best.gff"])), gff_best)
shutil.move(os.path.join(self.gff_output, self.all_cand,
"_".join([prefix, "sORF_all.csv"])), csv_all)
shutil.move(os.path.join(self.gff_output, self.all_cand,
"_".join([prefix, "sORF_best.csv"])), csv_best)
log.write("\t" + gff_all + "\n")
log.write("\t" + gff_best + "\n")
log.write("\t" + csv_all + "\n")
log.write("\t" + csv_best + "\n")
def _remove_tmp(self, args_sorf):
self.helper.remove_all_content(args_sorf.out_folder, ".gff", "file")
self.helper.remove_tmp_dir(args_sorf.fastas)
self.helper.remove_tmp_dir(args_sorf.gffs)
self.helper.remove_tmp_dir(args_sorf.tsss)
self.helper.remove_tmp_dir(args_sorf.trans)
self.helper.remove_tmp_dir(args_sorf.srnas)
if "temp_wig" in os.listdir(args_sorf.out_folder):
shutil.rmtree(os.path.join(args_sorf.out_folder, "temp_wig"))
if "merge_wigs" in os.listdir(args_sorf.out_folder):
shutil.rmtree(os.path.join(args_sorf.out_folder, "merge_wigs"))
def _compare_tran_cds(self, args_sorf, log):
'''compare transcript and CDS to find the intergenic region'''
prefixs = []
log.write("Running sORF_intergenic.py to extract the sequences of "
"potential sORFs\n")
for gff in os.listdir(args_sorf.gffs):
if gff.endswith(".gff"):
prefix = gff.replace(".gff", "")
prefixs.append(prefix)
print("Comparing transcripts and CDSs of {0}".format(prefix))
get_intergenic(os.path.join(args_sorf.gffs, gff),
os.path.join(self.tran_path,
"_".join([prefix, "transcript.gff"])),
os.path.join(args_sorf.out_folder,
"_".join([prefix, "inter.gff"])),
args_sorf.utr_detect, args_sorf.hypo,
args_sorf.extend_5, args_sorf.extend_3)
log.write("\t" + os.path.join(args_sorf.out_folder,
"_".join([prefix, "inter.gff"])) +
" is generated to temporary store the sequences.\n")
return prefixs
def _re_table(self, args_sorf, prefixs, log):
log.write("Running re_table.py for generating coverage information.\n")
log.write("The following files are updated:\n")
for type_ in ["all_candidates", "best_candidates"]:
for prefix in prefixs:
table_file = os.path.join(args_sorf.out_folder, "tables",
type_, "_".join([
prefix, "sORF.csv"]))
reorganize_table(args_sorf.libs, args_sorf.merge_wigs,
"Track_detail", table_file)
log.write("\t" + table_file + "\n")
def run_sorf_detection(self, args_sorf, log):
if args_sorf.fuzzy_rbs > 6:
log.write("--fuzzy_rbs should be equal or less than 6!\n")
print("Error: --fuzzy_rbs should be equal or less than 6!")
sys.exit()
self._check_necessary_files(args_sorf, log)
self.multiparser.parser_gff(args_sorf.trans, "transcript")
self.multiparser.combine_gff(args_sorf.gffs, self.tran_path,
None, "transcript")
self.multiparser.parser_fasta(args_sorf.fastas)
self.multiparser.combine_fasta(args_sorf.gffs, self.fasta_path, None)
prefixs = self._compare_tran_cds(args_sorf, log)
self._start_stop_codon(prefixs, args_sorf, log)
log.write("Running stat_sorf.py to do statistics.\n")
for sorf in os.listdir(os.path.join(self.gff_output, self.all_cand)):
print("Running statistics of {0}".format(sorf))
if sorf.endswith("_sORF.gff"):
stat_file = os.path.join(args_sorf.out_folder, "statistics",
"_".join(["stat", sorf.replace(".gff", ".csv")]))
stat(os.path.join(self.gff_output, self.all_cand, sorf),
os.path.join(self.gff_output, self.best, sorf), stat_file,
args_sorf.utr_detect)
log.write("\t" + stat_file + " is generated.\n")
self._re_table(args_sorf, prefixs, log)
self._remove_tmp(args_sorf)
示例2: TestMultiparser
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_fasta [as 别名]
class TestMultiparser(unittest.TestCase):
def setUp(self):
self.multiparser = Multiparser()
self.example = Example()
self.ref_folder = "ref_folder"
if (not os.path.exists(self.ref_folder)):
os.mkdir(self.ref_folder)
self.tar_folder = "tar_folder"
if (not os.path.exists(self.tar_folder)):
os.mkdir(self.tar_folder)
def tearDown(self):
if os.path.exists(self.ref_folder):
shutil.rmtree(self.ref_folder)
if os.path.exists(self.tar_folder):
shutil.rmtree(self.tar_folder)
def test_combine_fasta(self):
tmp_tar = os.path.join(self.tar_folder, "tmp")
tmp_ref = os.path.join(self.ref_folder, "test.gff_folder")
os.mkdir(tmp_ref)
os.mkdir(tmp_tar)
sub_fasta1 = os.path.join(tmp_tar, "aaa.fa")
with open(sub_fasta1, "w") as rh:
rh.write(self.example.sub_fasta1)
sub_fasta2 = os.path.join(tmp_tar, "bbb.fa")
with open(sub_fasta2, "w") as rh:
rh.write(self.example.sub_fasta2)
sub_gff1 = os.path.join(tmp_ref, "aaa.gff")
with open(sub_gff1, "w") as rh:
rh.write(self.example.sub_gff1)
sub_gff2 = os.path.join(tmp_ref, "bbb.gff")
with open(sub_gff2, "w") as rh:
rh.write(self.example.sub_gff2)
self.multiparser.combine_fasta(self.ref_folder, tmp_tar, None)
self.assertTrue(os.path.exists(os.path.join(tmp_tar, "test.fa")))
def test_combine_wig(self):
tmp_tar = os.path.join(self.tar_folder, "tmp")
tmp_ref = os.path.join(self.ref_folder, "test.fa_folder")
os.mkdir(tmp_ref)
os.mkdir(tmp_tar)
sub_fasta1 = os.path.join(tmp_ref, "aaa.fa")
with open(sub_fasta1, "w") as rh:
rh.write(self.example.sub_fasta1)
sub_fasta2 = os.path.join(tmp_ref, "bbb.fa")
with open(sub_fasta2, "w") as rh:
rh.write(self.example.sub_fasta2)
sub_wig1 = os.path.join(tmp_tar, "test_forward.wig_STRAIN_aaa.wig")
sub_wig2 = os.path.join(tmp_tar, "test_forward.wig_STRAIN_bbb.wig")
sub_wig3 = os.path.join(tmp_tar, "test_reverse.wig_STRAIN_aaa.wig")
sub_wig4 = os.path.join(tmp_tar, "test_reverse.wig_STRAIN_bbb.wig")
wig_files = [sub_wig1, sub_wig2, sub_wig3, sub_wig4]
example_wigs = [self.example.sub_f_wig1, self.example.sub_f_wig2,
self.example.sub_r_wig1, self.example.sub_r_wig2]
for index in range(0, 4):
with open(wig_files[index], "w") as fh:
fh.write(example_wigs[index])
libs = ["test_forward.wig_STRAIN_aaa.wig:frag:1:a:+", "test_reverse.wig_STRAIN_aaa.wig:frag:1:a:-"]
self.multiparser.combine_wig(self.ref_folder, tmp_tar, "fasta", libs)
self.assertTrue(os.path.exists(os.path.join(tmp_tar, "test_forward.wig")))
self.assertTrue(os.path.exists(os.path.join(tmp_tar, "test_reverse.wig")))
def test_combine_gff(self):
tmp_tar = os.path.join(self.tar_folder, "tmp")
tmp_ref = os.path.join(self.ref_folder, "test.fa_folder")
os.mkdir(tmp_ref)
os.mkdir(tmp_tar)
sub_fasta1 = os.path.join(tmp_ref, "aaa.fa")
with open(sub_fasta1, "w") as rh:
rh.write(self.example.sub_fasta1)
sub_fasta2 = os.path.join(tmp_ref, "bbb.fa")
with open(sub_fasta2, "w") as rh:
rh.write(self.example.sub_fasta2)
sub_gff1 = os.path.join(tmp_tar, "aaa.gff")
with open(sub_gff1, "w") as rh:
rh.write(self.example.sub_gff1)
sub_gff2 = os.path.join(tmp_tar, "bbb.gff")
with open(sub_gff2, "w") as rh:
rh.write(self.example.sub_gff2)
self.multiparser.combine_gff(self.ref_folder, tmp_tar, "fasta", None)
self.assertTrue(os.path.exists(os.path.join(tmp_tar, "test.gff")))
def test_parser_fasta(self):
fasta_file = os.path.join(self.ref_folder, "test.fa")
with open(fasta_file, "w") as rh:
rh.write(self.example.fasta_file)
self.multiparser.parser_fasta(self.ref_folder)
self.assertTrue(os.path.exists(os.path.join(self.ref_folder, "tmp/aaa.fa")))
self.assertTrue(os.path.exists(os.path.join(self.ref_folder, "tmp/bbb.fa")))
self.assertTrue(os.path.exists(os.path.join(self.ref_folder, "test.fa_folder/aaa.fa")))
self.assertTrue(os.path.exists(os.path.join(self.ref_folder, "test.fa_folder/bbb.fa")))
def test_parser_gff(self):
gff_file = os.path.join(self.ref_folder, "test.gff")
with open(gff_file, "w") as rh:
rh.write(self.example.gff_file)
self.multiparser.parser_gff(self.ref_folder, None)
self.assertTrue(os.path.exists(os.path.join(self.ref_folder, "tmp/aaa.gff")))
#.........这里部分代码省略.........
示例3: sRNADetection
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_fasta [as 别名]
#.........这里部分代码省略.........
if (args_srna.tss_folder is None):
print("Error: lack required TSS files for UTR "
"derived sRNA detection!!!!")
sys.exit()
if (args_srna.pro_folder is None):
print("Warning: lack Processing site files for UTR "
"derived sRNA detection!!!")
print("it may effect the results!!!!")
self._check_gff(args_srna.gffs)
self._check_gff(args_srna.trans)
if args_srna.tss_folder is not None:
self._check_gff(args_srna.tss_folder)
self.multiparser.parser_gff(args_srna.tss_folder, "TSS")
self.multiparser.combine_gff(args_srna.gffs, self.tss_path,
None, "TSS")
if args_srna.pro_folder is not None:
self._check_gff(args_srna.pro_folder)
self.multiparser.parser_gff(args_srna.pro_folder, "processing")
self.multiparser.combine_gff(args_srna.gffs, self.pro_path,
None, "processing")
if args_srna.sorf_file is not None:
self._check_gff(args_srna.sorf_file)
self.multiparser.parser_gff(args_srna.sorf_file, "sORF")
self.multiparser.combine_gff(args_srna.gffs, self.sorf_path,
None, "sORF")
if args_srna.utr_srna or ("sec_str" in args_srna.import_info) or (
"blast_nr" in args_srna.import_info) or (
"blast_srna" in args_srna.import_info):
if args_srna.fastas is None:
print("Error: lack required fasta files for UTR "
"derived sRNA detection!!!!")
sys.exit()
self.multiparser.parser_fasta(args_srna.fastas)
self.multiparser.combine_fasta(args_srna.gffs,
self.fasta_path, None)
if args_srna.terms is not None:
self._check_gff(args_srna.terms)
self.multiparser.parser_gff(args_srna.terms, "term")
self.multiparser.combine_gff(args_srna.gffs, self.term_path,
None, "term")
else:
self.term_path = None
def _run_program(self, args_srna):
prefixs = []
tss = None
for gff in os.listdir(args_srna.gffs):
if gff.endswith(".gff"):
prefix = gff.replace(".gff", "")
prefixs.append(prefix)
print("Running sRNA detection of {0}....".format(prefix))
tran = self.helper.get_correct_file(
self.tran_path, "_transcript.gff", prefix, None, None)
gffs = {"merge": "_".join([self.prefixs["merge"], prefix]),
"utr": "_".join([self.prefixs["utr"], prefix]),
"normal": "_".join([self.prefixs["normal"], prefix])}
csvs = {"merge": "_".join([
self.prefixs["merge_table"], prefix]),
"utr": "_".join([self.prefixs["utr_table"], prefix]),
"normal": "_".join([
self.prefixs["normal_table"], prefix])}
tss = self._run_normal(
prefix, gff, tran, args_srna.fuzzy_tsss["inter"],
args_srna)
if args_srna.utr_srna:
print("Running UTR derived sRNA detection of {0}".format(
示例4: TSSpredator
# 需要导入模块: from annogesiclib.multiparser import Multiparser [as 别名]
# 或者: from annogesiclib.multiparser.Multiparser import combine_fasta [as 别名]
#.........这里部分代码省略.........
if tss.endswith("_processing.gff"):
ref = self.helper.get_correct_file(
args_tss.overlap_gffs, "_TSS.gff",
tss.replace("_processing.gff", ""), None, None)
filter_tss_pro(os.path.join(out_folder, tss),
ref, args_tss.program,
args_tss.cluster)
def _low_expression(self, args_tss, gff_folder):
'''deal with the low expressed TSS'''
prefix = None
self._merge_wigs(args_tss.wig_folder, "wig", args_tss.libs)
for gff in os.listdir(gff_folder):
if (args_tss.program.lower() == "tss") and (
gff.endswith("_TSS.gff")):
prefix = gff.replace("_TSS.gff", "")
elif (args_tss.program.lower() == "processing") and (
gff.endswith("_processing.gff")):
prefix = gff.replace("_processing.gff", "")
if prefix:
out = open(os.path.join(
self.stat_outfolder, prefix, "_".join([
"stat", prefix, "low_expression_cutoff.csv"])), "w")
out.write("\t".join(["Genome", "Cutoff_coverage"]) + "\n")
cutoff = filter_low_expression(
os.path.join(gff_folder, gff), args_tss,
"tmp/merge_forward.wig", "tmp/merge_reverse.wig",
"tmp/without_low_expression.gff")
out.write("\t".join([prefix, str(cutoff)]) + "\n")
os.remove(os.path.join(gff_folder, gff))
shutil.move("tmp/without_low_expression.gff",
os.path.join(gff_folder, gff))
prefix = None
out.close()
def run_tsspredator(self, args_tss, log):
input_folder = os.path.join(args_tss.out_folder, "configs")
for gff in os.listdir(args_tss.gffs):
if gff.endswith(".gff"):
self.helper.check_uni_attributes(os.path.join(
args_tss.gffs, gff))
self.helper.check_make_folder(self.gff_outfolder)
self.multiparser.parser_fasta(args_tss.fastas)
self.multiparser.parser_gff(args_tss.gffs, None)
self.multiparser.parser_wig(args_tss.wig_folder)
prefixs = self._set_gen_config(args_tss, input_folder, log)
for prefix in prefixs:
out_path = os.path.join(
self.master, "_".join(["MasterTable", prefix]))
config_file = os.path.join(
input_folder, "_".join(["config", prefix]) + ".ini")
self._start_to_run(args_tss.tsspredator_path, config_file,
out_path, prefix, log)
if os.path.exists(os.path.join(out_path, "TSSstatistics.tsv")):
shutil.move(os.path.join(out_path, "TSSstatistics.tsv"),
os.path.join(
self.stat_outfolder, "TSSstatistics.tsv"))
if args_tss.program.lower() == "ps":
args_tss.program = "processing"
self._convert_gff(prefixs, args_tss, log)
if args_tss.check_orphan:
print("checking the orphan TSSs")
log.write("Running check_orphan.py to re-check orphan TSSs.\n")
self._check_orphan(prefixs,
os.path.join(args_tss.wig_folder, "tmp"),
args_tss)
self.multiparser.combine_gff(args_tss.gffs, self.gff_outfolder,
None, args_tss.program)
datas = []
for gff in os.listdir(self.gff_outfolder):
if gff.endswith(".gff"):
gff_folder = gff.replace("".join(["_", args_tss.program,
".gff"]), "")
self.helper.check_make_folder(
os.path.join(self.stat_outfolder, gff_folder))
datas.append(gff_folder)
if args_tss.remove_low_expression is not None:
log.write("Running filter_low_expression.py to filter out "
"low expressed TSS/PS.\n")
self._low_expression(args_tss, self.gff_outfolder)
if args_tss.manual is not None:
self.multiparser.parser_gff(args_tss.manual, None)
self.multiparser.combine_gff(args_tss.gffs, self.manual_path,
None, None)
self.multiparser.combine_fasta(args_tss.gffs, self.fasta_path,
None)
self.multiparser.combine_wig(args_tss.gffs, self.wig_path,
None, args_tss.libs)
log.write("Running merge_manual.py to merge the manual TSSs.\n")
self._merge_manual(datas, args_tss)
log.write("Running filter_TSS_pro.py to deal with the overlap "
"position between TSS and PS.\n")
self._deal_with_overlap(self.gff_outfolder, args_tss)
log.write("Running stat_TSSpredator.py to do statistics.\n")
self._stat_tss(datas, args_tss.program, log)
if args_tss.validate:
self._validate(datas, args_tss, log)
if args_tss.ta_files is not None:
self._compare_ta(datas, args_tss, log)
self._remove_files(args_tss)