本文整理汇总了Python中quast_libs.qutils.label_from_fpath函数的典型用法代码示例。如果您正苦于以下问题:Python label_from_fpath函数的具体用法?Python label_from_fpath怎么用?Python label_from_fpath使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了label_from_fpath函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: save_features_in_contigs
def save_features_in_contigs(output_dirpath, contigs_fpaths, feature_name, features_in_contigs, ref_features_num):
return save(join(output_dirpath, feature_name + in_contigs_suffix_fn), {
'filenames': [qutils.label_from_fpath(label) for label in contigs_fpaths],
feature_name + '_in_contigs': dict((qutils.label_from_fpath(contigs_fpath), feature_amounts)
for (contigs_fpath, feature_amounts) in features_in_contigs.items()),
'ref_' + feature_name + '_number': ref_features_num,
})
示例2: draw_coverage_histograms
def draw_coverage_histograms(coverage_dict, contigs_fpaths, output_dirpath):
total_len = dict()
contigs_dict = dict()
contigs_with_coverage = [contigs_fpath for contigs_fpath in contigs_fpaths if coverage_dict[contigs_fpath]]
for contigs_fpath in contigs_fpaths:
total_len[contigs_fpath] = reporting.get(contigs_fpath).get_field(reporting.Fields.TOTALLEN)
contigs_dict[contigs_fpath] = reporting.get(contigs_fpath).get_field(reporting.Fields.CONTIGS)
cov_values = [coverage_dict[contigs_fpath] for contigs_fpath in contigs_with_coverage]
num_contigs = [contigs_dict[contigs_fpath] for contigs_fpath in contigs_with_coverage]
common_coverage_values, bin_size, low_threshold, high_threshold, max_cov = binning_coverage(cov_values, num_contigs)
histogram_title = 'Coverage histogram (bin size: ' + str(bin_size) + 'x)'
plotter.coverage_histogram(contigs_with_coverage, common_coverage_values, output_dirpath + '/coverage_histogram',
histogram_title, bin_size=bin_size, max_cov=max_cov, low_threshold=low_threshold, high_threshold=high_threshold)
for contigs_fpath in contigs_with_coverage:
coverage_values, bin_size, low_threshold, high_threshold, max_cov = binning_coverage([coverage_dict[contigs_fpath]],
[contigs_dict[contigs_fpath]])
label = qutils.label_from_fpath(contigs_fpath)
corr_label = qutils.label_from_fpath_for_fname(contigs_fpath)
histogram_title = label + ' coverage histogram (bin size: ' + str(bin_size) + 'x)'
histogram_fpath = os.path.join(output_dirpath, corr_label + '_coverage_histogram')
plotter.coverage_histogram([contigs_fpath], coverage_values, histogram_fpath,
histogram_title, draw_bars=True, bin_size=bin_size, max_cov=max_cov,
low_threshold=low_threshold, high_threshold=high_threshold)
示例3: save_contigs_lengths
def save_contigs_lengths(output_dirpath, contigs_fpaths, lists_of_lengths):
lists_of_lengths = [sorted(list, reverse=True) for list in lists_of_lengths]
return save(join(output_dirpath, contigs_lengths_fn), {
'filenames': [qutils.label_from_fpath(label) for label in contigs_fpaths],
'lists_of_lengths': lists_of_lengths
})
示例4: save_coord
def save_coord(output_dirpath, coord_x, coord_y, name_coord, contigs_fpaths):
coord_fn = name_coord + suffix_fn
return save(join(output_dirpath, coord_fn), {
'coord_x': coord_x,
'coord_y': coord_y,
'filenames': [qutils.label_from_fpath(label) for label in contigs_fpaths]
})
示例5: get_assemblies_data
def get_assemblies_data(contigs_fpaths, icarus_dirpath, stdout_pattern, nx_marks):
assemblies_n50 = defaultdict(dict)
assemblies_data = ''
assemblies_data += 'var assemblies_links = {};\n'
assemblies_data += 'var assemblies_len = {};\n'
assemblies_data += 'var assemblies_contigs = {};\n'
assemblies_data += 'var assemblies_misassemblies = {};\n'
assemblies_data += 'var assemblies_n50 = {};\n'
assemblies_contig_size_data = ''
for contigs_fpath in contigs_fpaths:
assembly_label = qutils.label_from_fpath(contigs_fpath)
report = reporting.get(contigs_fpath)
l = report.get_field(reporting.Fields.TOTALLEN)
contigs = report.get_field(reporting.Fields.CONTIGS)
n50 = report.get_field(reporting.Fields.N50)
if stdout_pattern:
contig_stdout_fpath = stdout_pattern % qutils.label_from_fpath_for_fname(contigs_fpath) + '.stdout'
contig_stdout_fpath = qutils.relpath(contig_stdout_fpath, icarus_dirpath)
assemblies_data += 'assemblies_links["' + assembly_label + '"] = "' + contig_stdout_fpath + '";\n'
assemblies_contig_size_data += 'assemblies_len["' + assembly_label + '"] = ' + str(l) + ';\n'
assemblies_contig_size_data += 'assemblies_contigs["' + assembly_label + '"] = ' + str(contigs) + ';\n'
assemblies_contig_size_data += 'assemblies_n50["' + assembly_label + '"] = "' + str(n50) + '";\n'
for nx in nx_marks:
assemblies_n50[assembly_label][nx] = report.get_field(nx)
return assemblies_data, assemblies_contig_size_data, assemblies_n50
示例6: get_color_and_ls
def get_color_and_ls(fpath, label=None):
from quast_libs import qutils
if not label:
label = qutils.label_from_fpath(fpath)
if not dict_color_and_ls:
return None, None
"""
Returns tuple: color, line style
"""
return dict_color_and_ls[label]
示例7: parallel_partition_contigs
def parallel_partition_contigs(asm, assemblies_by_ref, corrected_dirpath, alignments_fpath_template):
assembly_label = qutils.label_from_fpath(asm.fpath)
corr_assembly_label = qutils.label_from_fpath_for_fname(asm.fpath)
logger.info(' ' + 'processing ' + assembly_label)
added_ref_asm = []
not_aligned_fname = corr_assembly_label + '_not_aligned_anywhere.fasta'
not_aligned_fpath = os.path.join(corrected_dirpath, not_aligned_fname)
contigs = {}
aligned_contig_names = set()
aligned_contigs_for_each_ref = {}
contigs_seq = fastaparser.read_fasta_one_time(asm.fpath)
alignments_fpath = alignments_fpath_template % corr_assembly_label
if os.path.exists(alignments_fpath):
with open(alignments_fpath) as f:
for line in f:
values = line.split()
if values[0] in contigs_analyzer.ref_labels_by_chromosomes.keys():
ref_name = contigs_analyzer.ref_labels_by_chromosomes[values[0]]
ref_contigs_names = values[1:]
ref_contigs_fpath = os.path.join(
corrected_dirpath, corr_assembly_label + '_to_' + ref_name + '.fasta')
if ref_name not in aligned_contigs_for_each_ref:
aligned_contigs_for_each_ref[ref_name] = []
for (cont_name, seq) in contigs_seq:
if not cont_name in contigs:
contigs[cont_name] = seq
if cont_name in ref_contigs_names and cont_name not in aligned_contigs_for_each_ref[ref_name]:
# Collecting all aligned contigs names in order to further extract not aligned
aligned_contig_names.add(cont_name)
aligned_contigs_for_each_ref[ref_name].append(cont_name)
fastaparser.write_fasta(ref_contigs_fpath, [(cont_name, seq)], 'a')
ref_asm = Assembly(ref_contigs_fpath, assembly_label)
if ref_asm.name not in added_ref_asm:
if ref_name in assemblies_by_ref:
assemblies_by_ref[ref_name].append(ref_asm)
added_ref_asm.append(ref_asm.name)
if qconfig.space_efficient:
os.remove(alignments_fpath)
# Extraction not aligned contigs
all_contigs_names = set(contigs.keys())
not_aligned_contigs_names = all_contigs_names - aligned_contig_names
fastaparser.write_fasta(not_aligned_fpath, [(name, contigs[name]) for name in not_aligned_contigs_names])
not_aligned_asm = Assembly(not_aligned_fpath, asm.label)
return assemblies_by_ref, not_aligned_asm
示例8: do
def do(contigs_fpaths, gene_lengths, out_dirpath):
logger.print_timestamp()
logger.main_info('Running GlimmerHMM...')
tool_dirpath = os.path.join(qconfig.LIBS_LOCATION, 'glimmer')
tmp_dirpath = os.path.join(out_dirpath, 'tmp')
tool_exec_fpath = compile_glimmer(logger)
if not tool_exec_fpath:
return
if not os.path.isdir(out_dirpath):
os.makedirs(out_dirpath)
if not os.path.isdir(tmp_dirpath):
os.makedirs(tmp_dirpath)
n_jobs = min(len(contigs_fpaths), qconfig.max_threads)
if is_python2():
from joblib import Parallel, delayed
else:
from joblib3 import Parallel, delayed
if qconfig.memory_efficient:
results = Parallel(n_jobs=n_jobs)(delayed(predict_genes)(
index, contigs_fpath, gene_lengths, out_dirpath, tool_dirpath, tmp_dirpath)
for index, contigs_fpath in enumerate(contigs_fpaths))
else:
results = [predict_genes(index, contigs_fpath, gene_lengths, out_dirpath, tool_dirpath, tmp_dirpath)
for index, contigs_fpath in enumerate(contigs_fpaths)]
genes_by_labels = dict()
# saving results
for i, contigs_fpath in enumerate(contigs_fpaths):
report = reporting.get(contigs_fpath)
label = qutils.label_from_fpath(contigs_fpath)
genes_by_labels[label], unique, full_genes, partial_genes = results[i]
if unique is not None:
report.add_field(reporting.Fields.PREDICTED_GENES_UNIQUE, unique)
if full_genes is not None:
genes = ['%s + %s part' % (full_cnt, partial_cnt) for full_cnt, partial_cnt in zip(full_genes, partial_genes)]
report.add_field(reporting.Fields.PREDICTED_GENES, genes)
if unique is None and full_genes is None:
logger.error(
'Glimmer failed running Glimmer for %s. ' + ('Run with the --debug option'
' to see the command line.' if not qconfig.debug else '') % label)
if not qconfig.debug:
shutil.rmtree(tmp_dirpath)
logger.main_info('Done.')
return genes_by_labels
示例9: correct_assemblies
def correct_assemblies(contigs_fpaths, output_dirpath, labels):
corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)
# we need correction but do not need min-contig filtration
min_contig = qconfig.min_contig
qconfig.min_contig = 0
corrected_contigs_fpaths, old_contigs_fpaths = qutils.correct_contigs(contigs_fpaths, corrected_dirpath, labels, reporting=None)
qconfig.min_contig = min_contig
assemblies = [Assembly(fpath, qutils.label_from_fpath(fpath)) for fpath in old_contigs_fpaths]
corrected_labels = [asm.label for asm in assemblies]
if qconfig.draw_plots or qconfig.html_report:
corr_fpaths = [asm.fpath for asm in assemblies]
corr_labels = [asm.label for asm in assemblies]
plotter_data.save_colors_and_ls(corr_fpaths, labels=corr_labels)
return assemblies, corrected_labels
示例10: save_colors
def save_colors(results_dirpath, contigs_fpaths, dict_colors, meta=False): # coordinates for Nx, NAx, NGx, NGAX
if meta:
html_fpath = os.path.join(results_dirpath, report_fname)
with open(html_fpath) as f_html:
html_text = f_html.read()
html_text = re.sub('{{ ' + 'colors' + ' }}', 'standard_colors', html_text)
html_text = re.sub('{{ ' + 'broken_scaffolds' + ' }}', '[]', html_text)
with open(html_fpath, 'w') as f_html:
f_html.write(html_text)
else:
contig_labels = [qutils.label_from_fpath(contigs_fpath) for contigs_fpath in contigs_fpaths]
colors_and_ls = [dict_colors[contig_label] for contig_label in contig_labels]
colors = [color_and_ls[0] for color_and_ls in colors_and_ls]
colors_for_html = [html_colors[plotter_data.colors.index(color)] for color in colors]
save_record(results_dirpath, 'colors', colors_for_html)
broken_contig_names = [label for i, label in enumerate(contig_labels) if colors_and_ls[i][1] == secondary_line_style]
save_record(results_dirpath, 'broken_scaffolds', broken_contig_names)
示例11: calculate_ave_read_support
def calculate_ave_read_support(combined_output_dirpath, assemblies):
unique_contigs_fpath = os.path.join(combined_output_dirpath, 'contigs_reports', qconfig.unique_contigs_fname_pattern)
for assembly in assemblies:
aligned_contigs_by_ref = dict()
assembly_label = qutils.label_from_fpath(assembly.fpath)
corr_assembly_label = qutils.label_from_fpath_for_fname(assembly.fpath)
with open(unique_contigs_fpath % corr_assembly_label) as in_f:
for line in in_f:
ref_name, contig_len, contig_cov = line.strip().split('\t')
aligned_contigs_by_ref.setdefault(ref_name, []).append((float(contig_len), float(contig_cov)))
for ref_name, contigs in aligned_contigs_by_ref.items():
ref_cov = sum(contig_cov * aligned_len for (aligned_len, contig_cov) in contigs)
ref_cov /= sum(aligned_len for (aligned_len, contig_cov) in contigs)
corr_assembly_label = qutils.label_from_fpath_for_fname(assembly.fpath)
ref_contigs_fpath = os.path.join(
os.path.dirname(assembly.fpath), corr_assembly_label + '_to_' + ref_name + '.fasta')
qconfig.assembly_labels_by_fpath[ref_contigs_fpath] = assembly_label
report = reporting.get(ref_contigs_fpath, ref_name=ref_name)
report.add_field(reporting.Fields.AVE_READ_SUPPORT, '%.2f' % ref_cov)
示例12: do
def do(contigs_fpaths, gene_lengths, out_dirpath):
logger.print_timestamp()
logger.main_info('Running GlimmerHMM...')
tool_dirpath = os.path.join(qconfig.LIBS_LOCATION, 'glimmer')
tmp_dirpath = os.path.join(out_dirpath, 'tmp')
tool_exec_fpath = compile_glimmer(logger)
if not tool_exec_fpath:
return
if not os.path.isdir(out_dirpath):
os.makedirs(out_dirpath)
if not os.path.isdir(tmp_dirpath):
os.makedirs(tmp_dirpath)
n_jobs = min(len(contigs_fpaths), qconfig.max_threads)
parallel_args = [(index, contigs_fpath, gene_lengths, out_dirpath, tool_dirpath, tool_exec_fpath, tmp_dirpath)
for index, contigs_fpath in enumerate(contigs_fpaths)]
genes_list, unique, full_genes, partial_genes = run_parallel(predict_genes, parallel_args, n_jobs)
genes_by_labels = dict()
# saving results
for i, contigs_fpath in enumerate(contigs_fpaths):
report = reporting.get(contigs_fpath)
label = qutils.label_from_fpath(contigs_fpath)
genes_by_labels[label] = genes_list[i]
if unique[i] is not None:
report.add_field(reporting.Fields.PREDICTED_GENES_UNIQUE, unique[i])
if full_genes[i] is not None:
genes = ['%s + %s part' % (full_cnt, partial_cnt) for full_cnt, partial_cnt in zip(full_genes[i], partial_genes[i])]
report.add_field(reporting.Fields.PREDICTED_GENES, genes)
if unique[i] is None and full_genes[i] is None:
logger.error(
'Failed running Glimmer for %s. ' % label + ('Run with the --debug option'
' to see the command line.' if not qconfig.debug else ''))
if not qconfig.debug:
shutil.rmtree(tmp_dirpath)
logger.main_info('Done.')
return genes_by_labels
示例13: save_total_report
def save_total_report(output_dirpath, min_contig, ref_fpath):
from quast_libs import reporting
asm_names = [qutils.label_from_fpath(this) for this in reporting.assembly_fpaths]
report = reporting.table(reporting.Fields.grouped_order)
subreports = []
ref_names = []
if qconfig.is_combined_ref and ref_labels_by_chromosomes:
ref_names = sorted(list(set([ref for ref in ref_labels_by_chromosomes.values()])))
subreports = [reporting.table(reporting.Fields.grouped_order, ref_name=ref_name) for ref_name in ref_names]
t = datetime.datetime.now()
return save(join(output_dirpath, total_report_fname), {
'date': t.strftime('%d %B %Y, %A, %H:%M:%S'),
'assembliesNames': asm_names,
'referenceName': qutils.name_from_fpath(ref_fpath) if ref_fpath else '',
'order': [i for i, _ in enumerate(asm_names)],
'report': report,
'subreferences': ref_names,
'subreports': subreports,
'minContig': min_contig
})
示例14: main
def main(args):
check_dirpath(qconfig.QUAST_HOME, 'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '.\n' +
'Please, put QUAST in a different directory, then try again.\n', exit_code=3)
if not args:
qconfig.usage(stream=sys.stderr)
sys.exit(1)
metaquast_path = [os.path.realpath(__file__)]
quast_py_args, contigs_fpaths = parse_options(logger, metaquast_path + args)
output_dirpath, ref_fpaths, labels = qconfig.output_dirpath, qconfig.reference, qconfig.labels
html_report = qconfig.html_report
test_mode = qconfig.test
# Directories
output_dirpath, _, _ = qutils.set_up_output_dir(
output_dirpath, None, not output_dirpath,
save_json=False)
corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)
qconfig.set_max_threads(logger)
qutils.logger = logger
########################################################################
from quast_libs import reporting
try:
import imp
imp.reload(reporting)
except:
reload(reporting)
from quast_libs import plotter
if os.path.isdir(corrected_dirpath):
shutil.rmtree(corrected_dirpath)
os.mkdir(corrected_dirpath)
# PROCESSING REFERENCES
if ref_fpaths:
logger.main_info()
logger.main_info('Reference(s):')
corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\
correct_meta_references(ref_fpaths, corrected_dirpath)
# PROCESSING CONTIGS
logger.main_info()
logger.main_info('Contigs:')
qconfig.no_check_meta = True
assemblies, labels = correct_assemblies(contigs_fpaths, output_dirpath, labels)
if not assemblies:
logger.error("None of the assembly files contains correct contigs. "
"Please, provide different files or decrease --min-contig threshold.")
return 4
# Running QUAST(s)
if qconfig.gene_finding:
quast_py_args += ['--mgm']
if qconfig.min_IDY is None: # special case: user not specified min-IDY, so we need to use MetaQUAST default value
quast_py_args += ['--min-identity', str(qconfig.META_MIN_IDY)]
downloaded_refs = False
# SEARCHING REFERENCES
if not ref_fpaths:
logger.main_info()
if qconfig.max_references == 0:
logger.notice("Maximum number of references (--max-ref-number) is set to 0, search in SILVA 16S rRNA database is disabled")
else:
if qconfig.references_txt:
logger.main_info("List of references was provided, starting to download reference genomes from NCBI...")
else:
logger.main_info("No references are provided, starting to search for reference genomes in SILVA 16S rRNA database "
"and to download them from NCBI...")
downloaded_dirpath = os.path.join(output_dirpath, qconfig.downloaded_dirname)
if not os.path.isdir(downloaded_dirpath):
os.mkdir(downloaded_dirpath)
corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)
ref_fpaths = search_references_meta.do(assemblies, labels, downloaded_dirpath, corrected_dirpath, qconfig.references_txt)
if ref_fpaths:
search_references_meta.is_quast_first_run = True
if not qconfig.references_txt:
downloaded_refs = True
logger.main_info()
logger.main_info('Downloaded reference(s):')
corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\
correct_meta_references(ref_fpaths, corrected_dirpath, downloaded_refs=True)
elif test_mode and not ref_fpaths:
logger.error('Failed to download or setup SILVA 16S rRNA database for working without '
'references on metagenome datasets!', to_stderr=True, exit_with_code=4)
if not ref_fpaths:
# No references, running regular quast with MetaGenemark gene finder
logger.main_info()
logger.notice('No references are provided, starting regular QUAST with MetaGeneMark gene finder')
assemblies = [Assembly(fpath, qutils.label_from_fpath(fpath)) for fpath in contigs_fpaths]
_start_quast_main(quast_py_args, assemblies=assemblies, output_dirpath=output_dirpath, run_regular_quast=True)
exit(0)
#.........这里部分代码省略.........
示例15: save_combined_ref_stats
def save_combined_ref_stats(results, contigs_fpaths, ref_labels_by_chromosomes, output_dir, logger):
istranslocations_by_asm = [result['istranslocations_by_refs'] if result else None for result in results]
misassemblies_by_asm = [result['misassemblies_by_ref'] if result else None for result in results]
all_refs = []
for ref in ref_labels_by_chromosomes.values():
if ref not in all_refs:
all_refs.append(ref)
if not qconfig.use_input_ref_order:
all_refs.sort()
misassemblies_by_refs_rows = []
row = {'metricName': 'References', 'values': all_refs}
misassemblies_by_refs_rows.append(row)
if not istranslocations_by_asm:
return
for i, fpath in enumerate(contigs_fpaths):
label = qutils.label_from_fpath(fpath)
row = {'metricName': label, 'values': []}
misassemblies_by_refs_rows.append(row)
istranslocations_by_ref = istranslocations_by_asm[i]
intergenomic_misassemblies_by_asm[label] = defaultdict(list)
for ref in all_refs:
intergenomic_misassemblies_by_asm[label][ref] = misassemblies_by_asm[i][ref] if misassemblies_by_asm[i] else []
if istranslocations_by_ref:
assembly_name = qutils.name_from_fpath(fpath)
all_rows = []
row = {'metricName': 'References', 'values': [ref_num + 1 for ref_num in range(len(all_refs))]}
all_rows.append(row)
for ref in all_refs:
row = {'metricName': ref, 'values': []}
for second_ref in all_refs:
if ref == second_ref or second_ref not in istranslocations_by_ref:
row['values'].append(None)
else:
row['values'].append(istranslocations_by_ref[ref][second_ref])
possible_misassemblies = 0
misassemblies_by_ref = misassemblies_by_asm[i]
if misassemblies_by_ref:
possible_misassemblies = misassemblies_by_ref[ref].count(Misassembly.POSSIBLE_MISASSEMBLIES)
istranslocations = max(0, sum([r for r in row['values'] if r]))
misassemblies_by_refs_rows[-1]['values'].append(istranslocations + possible_misassemblies)
all_rows.append(row)
misassembly_by_ref_fpath = os.path.join(output_dir, 'interspecies_translocations_by_refs_%s.info' % assembly_name)
with open(misassembly_by_ref_fpath, 'w') as misassembly_by_ref_file:
misassembly_by_ref_file.write('Number of interspecies translocations by references: \n')
print_file(all_rows, misassembly_by_ref_fpath, append_to_existing_file=True)
with open(misassembly_by_ref_fpath, 'a') as misassembly_by_ref_file:
misassembly_by_ref_file.write('References:\n')
for ref_num, ref in enumerate(all_refs):
misassembly_by_ref_file.write(str(ref_num + 1) + ' - ' + ref + '\n')
logger.info(' Information about interspecies translocations by references for %s is saved to %s' %
(assembly_name, misassembly_by_ref_fpath))
misassemblies = []
if qconfig.draw_plots:
from quast_libs import plotter
aligned_contigs_labels = []
for row in misassemblies_by_refs_rows[1:]:
if row['values']:
aligned_contigs_labels.append(row['metricName'])
else:
misassemblies_by_refs_rows.remove(row)
for i in range(len(all_refs)):
cur_results = []
for row in misassemblies_by_refs_rows[1:]:
if row['values']:
cur_results.append(row['values'][i])
misassemblies.append(cur_results)
is_translocations_plot_fpath = os.path.join(output_dir, 'intergenomic_misassemblies')
plotter.draw_meta_summary_plot('', output_dir, aligned_contigs_labels, all_refs,
misassemblies, is_translocations_plot_fpath,
title='Intergenomic misassemblies (found and supposed)', reverse=False,
yaxis_title=None, print_all_refs=True, logger=logger)