本文整理汇总了Python中pybedtools.BedTool.saveas方法的典型用法代码示例。如果您正苦于以下问题:Python BedTool.saveas方法的具体用法?Python BedTool.saveas怎么用?Python BedTool.saveas使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybedtools.BedTool
的用法示例。
在下文中一共展示了BedTool.saveas方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: clean_bed
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import saveas [as 别名]
def clean_bed(bed_fpath, work_dir):
clean_fpath = intermediate_fname(work_dir, bed_fpath, 'clean')
if not can_reuse(clean_fpath, bed_fpath):
pybedtools.set_tempdir(safe_mkdir(join(work_dir, 'pybedtools_tmp')))
bed = BedTool(bed_fpath)
bed = bed.filter(lambda x: x.chrom and
not any(x.chrom.startswith(e) for e in ['#', ' ', 'track', 'browser']))
bed = bed.remove_invalid()
with file_transaction(work_dir, clean_fpath) as tx_out_file:
bed.saveas(tx_out_file)
verify_bed(clean_fpath, is_critical=True)
debug('Saved clean BED file into ' + clean_fpath)
return clean_fpath
示例2: compare_bed
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import saveas [as 别名]
def compare_bed(self, bed_file, orig_from_string, pp_bed):
d = '/home/bioinfo/Natalie/wc/genes/compare_beds/'
original_bed = BedTool(bed_file, from_string=orig_from_string)
original_name = os.path.basename(bed_file)
saveas = d + original_name
original_bed.saveas(saveas)
saveas_pp = d + 'pp_' + original_name
pp_bed.saveas(saveas_pp)
missing_from_pp = original_bed.subtract(pp_bed)
print 'Missing from exported BED'
print missing_from_pp
saveas_missing_pp = d + 'missing_pp_' + original_name
missing_from_pp.saveas(saveas_missing_pp)
missing_from_orig = pp_bed.subtract(original_bed)
print 'Missing from original BED'
print missing_from_orig
saveas_missing_orig = d+ 'missing_orig_' + original_name
missing_from_orig.saveas(saveas_missing_orig)
示例3: transcripts_list_to_bed6
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import saveas [as 别名]
def transcripts_list_to_bed6(self, file_name=None, save_in_file=False ):
bed6_trans = [trans_to_b6.get_bed6() for trans_to_b6 in self.transcripts_list()]
# aqui deve entrar algum distema de filtros
bed6_trans = BedTool('\n'.join(bed6_trans), from_string=True).sort()
if not save_in_file:
return bed6_trans
else:
if file_name:
return bed6_trans.saveas(fn=file_name, trackline="track name='Transcripts {}' color=128,0,0".format(file_name.split('/')[-1]))
else:
raise IOError('\nthe file_name method function needs a name or complete file path with name.\n')
示例4: generate_remainder
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import saveas [as 别名]
def generate_remainder(whole_bed, bed_prefix, bed_list):
"""
Calculate the remaining regions that are not included in the truth set
:param whole_bed: path to the truth regions for the whole panel
:param bed_prefix: prefix used for all the bed files
:param bed_list: list of all the bed files for that panel
:return: BEDTool containing any regions that are completely missing from the truth regions
"""
whole_truth = BedTool(whole_bed)
whole_truth.saveas()
whole = BedTool()
for bed in bed_list:
print bed
tool = BedTool(bed)
tool.saveas()
if bed == bed_list[0]:
whole = tool
else:
whole = whole.cat(tool)
whole.saveas()
whole_sorted = whole.sort()
whole_merged = whole_sorted.merge()
whole_merged.saveas()
remainder = whole_merged.subtract(whole_truth)
remainder.moveto('/results/Analysis/MiSeq/MasterBED/GIAB/' + bed_prefix + '.remainder.bed')
missing_regions = whole_merged.subtract(whole_truth, A=True)
return missing_regions
示例5: gene_list_to_bed6
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import saveas [as 别名]
def gene_list_to_bed6(self, file_name=None, save_in_file=False):
"""
Returns:
BedTool: BedTool object with gene_ids inside.
or
Save a ".bed" in specified path in save_in_file parameter.
"""
bed6_genes = [gene_to_b6.get_bed6() for gene_to_b6 in self.gene_list()]
# aqui deve entrar algum distema de filtros
bed6_genes = BedTool('\n'.join(bed6_genes), from_string=True).sort()
if not save_in_file:
return bed6_genes
else:
if file_name:
return bed6_genes.saveas(fn=file_name, trackline="track name='Genes {}' color=128,0,0".format(
file_name.split('/')[-1]))
else:
raise IOError('\nthe file_name method function needs a name or complete file path with name.\n')
示例6: generate_remainder
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import saveas [as 别名]
def generate_remainder(whole_bed, out_dir, bed_list):
"""
Calculate the remaining regions that are not included in the truth set
:param whole_bed: Path to the truth regions for the whole panel
:type whole_bed: String
:param out_dir: Prefix used for all the bed files
:type out_dir: String
:param bed_list: List of all the bed files for that panel
:type bed_list: List of String
:return: BEDTool containing any regions that are completely missing from the truth regions
:rtype: BedTool
"""
try:
whole_truth = BedTool(whole_bed)
whole_truth.saveas()
whole = BedTool()
for bed in bed_list:
print(bed)
tool = BedTool(bed)
tool.saveas()
if bed == bed_list[0]:
whole = tool
else:
whole = whole.cat(tool)
whole.saveas()
whole_sorted = whole.sort()
whole_merged = whole_sorted.merge()
whole_merged.saveas()
remainder = whole_merged.subtract(whole_truth)
remainder.moveto(out_dir + '/remainder.bed')
missing_regions = whole_merged.subtract(whole_truth, A=True)
except UnicodeDecodeError:
missing_regions = None
return missing_regions
示例7: print_split_sort_bed
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import saveas [as 别名]
def print_split_sort_bed(self):
pass
split_num = 100
split_region_list = [[]*5]
print split_region_list
bed = BedTool('/Users/huangzhibo/workitems/10.testData/testPlatformTJ/bed/test.bed')
bed = BedTool(bed.sort().merge().window_maker(b=bed.fn, w=100))
bed.all_hits()
# x = BedTool().window_maker(genome='hg38', w=1000000)
bed.saveas('/Users/huangzhibo/workitems/10.testData/testPlatformTJ/bed/test_w100.bed')
split_num = bed.count() if bed.count() < split_num else split_num
print bed.count()/split_num
# print bed.split(10, 'out')
# print x
n = 0
for region in bed:
# print region.length
print str(region).strip()
n += 1
示例8: Dataset
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import saveas [as 别名]
d = Dataset(args.dataset)
A = SnpSubset(d, GenomicSubset(args.subset).bedtool)
if args.path_to_R is not None:
R = pickle.load(open(args.path_to_R))
else:
R = None
newA = IntRangeSet()
for r in A.expanded_by(0.003).irs.ranges():
S = IntRangeSet([a-r[0] for a in A.irs & IntRangeSet(r)])
print(r, 'analyzing', len(S), 'snps')
if R is None:
X = d.get_standardized_genotypes(r)
cov = X.T.dot(X) / d.N
else:
cov = R.ranges_to_arrays[r]
while True:
new = get_high_ld_snps(S, cov)
if len(new) == 0:
break
else:
print('\tadding', len(new), 'snps')
print('\t\tbefore', S)
S += new
print('\t\tafter', S)
newA += IntRangeSet([s+r[0] for s in S])
b = BedTool([interval_from_range(r) for r in newA.ranges()])
print(b)
b.saveas(paths.genome_subsets + args.subset + '.R2ge{:0.2}.bed'.format(args.R2_threshold))
示例9: annotate
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import saveas [as 别名]
def annotate(input_bed_fpath, output_fpath, work_dir, genome=None,
reannotate=True, high_confidence=False, only_canonical=False,
coding_only=False, short=False, extended=False, is_debug=False, **kwargs):
debug('Getting features from storage')
features_bed = ba.get_all_features(genome)
if features_bed is None:
critical('Genome ' + genome + ' is not supported. Supported: ' + ', '.join(ba.SUPPORTED_GENOMES))
if genome:
fai_fpath = reference_data.get_fai(genome)
chr_order = reference_data.get_chrom_order(genome)
else:
fai_fpath = None
chr_order = bed_chrom_order(input_bed_fpath)
input_bed_fpath = sort_bed(input_bed_fpath, work_dir=work_dir, chr_order=chr_order, genome=genome)
ori_bed = BedTool(input_bed_fpath)
ori_col_num = ori_bed.field_count()
reannotate = reannotate or ori_col_num == 3
pybedtools.set_tempdir(safe_mkdir(join(work_dir, 'bedtools')))
ori_bed = BedTool(input_bed_fpath)
if high_confidence:
features_bed = features_bed.filter(ba.high_confidence_filter)
if only_canonical:
features_bed = features_bed.filter(ba.get_only_canonical_filter(genome))
if coding_only:
features_bed = features_bed.filter(ba.protein_coding_filter)
# unique_tx_by_gene = find_best_tx_by_gene(features_bed)
info('Extracting features from Ensembl GTF')
features_bed = features_bed.filter(lambda x:
x[ba.BedCols.FEATURE] in ['exon', 'CDS', 'stop_codon', 'transcript'])
# x[ebl.BedCols.ENSEMBL_ID] == unique_tx_by_gene[x[ebl.BedCols.GENE]])
info('Overlapping regions with Ensembl data')
if is_debug:
ori_bed = ori_bed.saveas(join(work_dir, 'bed.bed'))
debug(f'Saved regions to {ori_bed.fn}')
features_bed = features_bed.saveas(join(work_dir, 'features.bed'))
debug(f'Saved features to {features_bed.fn}')
annotated = _annotate(ori_bed, features_bed, chr_order, fai_fpath, work_dir, ori_col_num,
high_confidence=False, reannotate=reannotate, is_debug=is_debug, **kwargs)
full_header = [ba.BedCols.names[i] for i in ba.BedCols.cols]
add_ori_extra_fields = ori_col_num > 3
if not reannotate and ori_col_num == 4:
add_ori_extra_fields = False # no need to report the original gene field if we are not re-annotating
info('Saving annotated regions...')
total = 0
with file_transaction(work_dir, output_fpath) as tx:
with open(tx, 'w') as out:
header = full_header[:6]
if short:
header = full_header[:4]
if extended:
header = full_header[:-1]
if add_ori_extra_fields:
header.append(full_header[-1])
if extended:
out.write('## ' + ba.BedCols.names[ba.BedCols.TX_OVERLAP_PERCENTAGE] +
': part of region overlapping with transcripts\n')
out.write('## ' + ba.BedCols.names[ba.BedCols.EXON_OVERLAPS_PERCENTAGE] +
': part of region overlapping with exons\n')
out.write('## ' + ba.BedCols.names[ba.BedCols.CDS_OVERLAPS_PERCENTAGE] +
': part of region overlapping with protein coding regions\n')
out.write('\t'.join(header) + '\n')
for full_fields in annotated:
fields = full_fields[:6]
if short:
fields = full_fields[:4]
if extended:
fields = full_fields[:-1]
if add_ori_extra_fields:
fields.append(full_fields[-1])
out.write('\t'.join(map(_format_field, fields)) + '\n')
total += 1
debug('Saved ' + str(total) + ' total annotated regions')
return output_fpath
示例10: _annotate
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import saveas [as 别名]
def _annotate(bed, ref_bed, chr_order, fai_fpath, work_dir, ori_col_num,
high_confidence=False, reannotate=False, is_debug=False, **kwargs):
# if genome:
# genome_fpath = cut(fai_fpath, 2, output_fpath=intermediate_fname(work_dir, fai_fpath, 'cut2'))
# intersection = bed.intersect(ref_bed, sorted=True, wao=True, g='<(cut -f1,2 ' + fai_fpath + ')')
# intersection = bed.intersect(ref_bed, sorted=True, wao=True, genome=genome.split('-')[0])
# else:
intersection_bed = None
intersection_fpath = None
pybedtools.set_tempdir(safe_mkdir(join(work_dir, 'bedtools')))
if is_debug:
intersection_fpath = join(work_dir, 'intersection.bed')
if isfile(intersection_fpath):
info('Loading from ' + intersection_fpath)
intersection_bed = BedTool(intersection_fpath)
if not intersection_bed:
if count_bed_cols(fai_fpath) == 2:
debug('Fai fields size is 2 ' + fai_fpath)
intersection_bed = bed.intersect(ref_bed, wao=True, g=fai_fpath)
else:
debug('Fai fields is ' + str(count_bed_cols(fai_fpath)) + ', not 2')
intersection_bed = bed.intersect(ref_bed, wao=True)
if is_debug and not isfile(intersection_fpath):
intersection_bed.saveas(intersection_fpath)
debug('Saved intersection to ' + intersection_fpath)
total_annotated = 0
total_uniq_annotated = 0
total_off_target = 0
met = set()
overlaps_by_tx_by_gene_by_loc = OrderedDefaultDict(lambda: OrderedDefaultDict(lambda: defaultdict(list)))
# off_targets = list()
expected_fields_num = ori_col_num + len(ba.BedCols.cols[:-4]) + 1
for i, intersection_fields in enumerate(intersection_bed):
inters_fields_list = list(intersection_fields)
if len(inters_fields_list) < expected_fields_num:
critical(
f'Cannot parse the reference BED file - unexpected number of lines '
f'({len(inters_fields_list)} in {inters_fields_list} (less than {expected_fields_num})')
a_chr, a_start, a_end = intersection_fields[:3]
a_extra_columns = intersection_fields[3:ori_col_num]
overlap_fields = [None for _ in ba.BedCols.cols]
overlap_fields[:len(intersection_fields[ori_col_num:])] = intersection_fields[ori_col_num:]
keep_gene_column = not reannotate
a_gene = None
if keep_gene_column:
a_gene = a_extra_columns[0]
e_chr = overlap_fields[0]
overlap_size = int(intersection_fields[-1])
assert e_chr == '.' or a_chr == e_chr, f'Error on line {i}: chromosomes don\'t match ({a_chr} vs {e_chr}). Line: {intersection_fields}'
# fs = [None for _ in ebl.BedCols.cols]
# fs[:3] = [a_chr, a_start, a_end]
reg = (a_chr, int(a_start), int(a_end), tuple(a_extra_columns))
if e_chr == '.':
total_off_target += 1
# off_targets.append(fs)
overlaps_by_tx_by_gene_by_loc[reg][a_gene] = OrderedDefaultDict(list)
else:
# fs[3:-1] = db_feature_fields[3:-1]
total_annotated += 1
if (a_chr, a_start, a_end) not in met:
total_uniq_annotated += 1
met.add((a_chr, a_start, a_end))
e_gene = overlap_fields[ba.BedCols.GENE] if not high_confidence else overlap_fields[ba.BedCols.HUGO]
if keep_gene_column and e_gene != a_gene:
overlaps_by_tx_by_gene_by_loc[reg][a_gene] = OrderedDefaultDict(list)
else:
transcript_id = overlap_fields[ba.BedCols.ENSEMBL_ID]
overlaps_by_tx_by_gene_by_loc[reg][e_gene][transcript_id].append((overlap_fields, overlap_size))
info(' Total annotated regions: ' + str(total_annotated))
info(' Total unique annotated regions: ' + str(total_uniq_annotated))
info(' Total off target regions: ' + str(total_off_target))
info('Resolving ambiguities...')
annotated = _resolve_ambiguities(overlaps_by_tx_by_gene_by_loc, chr_order, **kwargs)
return annotated
示例11: _make_target_bed
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import saveas [as 别名]
def _make_target_bed(self, bed_fpath, work_dir, output_dir, is_debug,
padding=None, fai_fpath=None, genome=None, reannotate=False):
clean_target_bed_fpath = intermediate_fname(work_dir, bed_fpath, 'clean')
if not can_reuse(clean_target_bed_fpath, bed_fpath):
debug()
debug('Cleaning target BED file...')
bed = BedTool(bed_fpath)
if bed.field_count() > 4:
bed = bed.cut(range(4))
bed = bed\
.filter(lambda x: x.chrom and not any(x.chrom.startswith(e) for e in ['#', ' ', 'track', 'browser']))\
.remove_invalid()
with file_transaction(work_dir, clean_target_bed_fpath) as tx:
bed.saveas(tx)
debug('Saved to ' + clean_target_bed_fpath)
verify_file(clean_target_bed_fpath, is_critical=True)
sort_target_bed_fpath = intermediate_fname(work_dir, clean_target_bed_fpath, 'sorted')
if not can_reuse(sort_target_bed_fpath, clean_target_bed_fpath):
debug()
debug('Sorting target BED file...')
sort_target_bed_fpath = sort_bed(clean_target_bed_fpath, output_bed_fpath=sort_target_bed_fpath, fai_fpath=fai_fpath)
debug('Saved to ' + sort_target_bed_fpath)
verify_file(sort_target_bed_fpath, is_critical=True)
if genome in ebl.SUPPORTED_GENOMES:
ann_target_bed_fpath = intermediate_fname(work_dir, sort_target_bed_fpath, 'ann_plus_features')
if not can_reuse(ann_target_bed_fpath, sort_target_bed_fpath):
debug()
if BedTool(sort_target_bed_fpath).field_count() == 3 or reannotate:
debug('Annotating target BED file and collecting overlapping genome features')
overlap_with_features(sort_target_bed_fpath, ann_target_bed_fpath, work_dir=work_dir,
genome=genome, extended=True, reannotate=reannotate, only_canonical=True)
else:
debug('Overlapping with genomic features:')
overlap_with_features(sort_target_bed_fpath, ann_target_bed_fpath, work_dir=work_dir,
genome=genome, extended=True, only_canonical=True)
debug('Saved to ' + ann_target_bed_fpath)
verify_file(ann_target_bed_fpath, is_critical=True)
else:
ann_target_bed_fpath = sort_target_bed_fpath
final_clean_target_bed_fpath = intermediate_fname(work_dir, ann_target_bed_fpath, 'clean')
if not can_reuse(final_clean_target_bed_fpath, ann_target_bed_fpath):
bed = BedTool(ann_target_bed_fpath).remove_invalid()
with file_transaction(work_dir, final_clean_target_bed_fpath) as tx:
bed.saveas(tx)
pass
verify_file(final_clean_target_bed_fpath, is_critical=True)
self.bed_fpath = final_clean_target_bed_fpath
self.bed = BedTool(self.bed_fpath)
self.capture_bed_fpath = add_suffix(join(output_dir, basename(bed_fpath)), 'clean_sorted_ann')
if not can_reuse(self.capture_bed_fpath, self.bed_fpath):
with file_transaction(work_dir, self.capture_bed_fpath) as tx:
self.get_capture_bed().saveas(tx)
gene_key_set, gene_key_list = get_genes_from_bed(bed_fpath)
self.gene_keys_set = gene_key_set
self.gene_keys_list = gene_key_list
self.regions_num = self.get_capture_bed().count()
self._make_qualimap_bed(work_dir)
if padding:
self._make_padded_bed(work_dir, fai_fpath, padding)
示例12: get_coords
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import saveas [as 别名]
def get_coords(self, df_primers):
"""Generates csv file for virtual PCR and imports results into a pandas data frame.
:param df_primers: data frame of primer data.
:return df_coords: data frame with chromosome, start and end coordinates, and a name
(format "Gene_ExonDirection") for each primer.
"""
primer_list = []
names_dup = []
names = []
exons = []
dirs = []
start_coords = []
end_coords = []
chroms = []
seq_position = 0
list_position = 0
primer_seqs = pd.DataFrame([])
csv = '%s.csv' % self.excel_file[:-5]
csv = csv.replace(" ", "")
# (1) Gets sequences, exons and directions, splits the sequences into F+R and combines into series and then csv.
for row_index, row in df_primers.iterrows():
primer_list.append(str(row['Primer_seq']))
names_dup.append(str(row['Gene']) + '_' + str(row['Exon']) + str(row['Direction']))
exons.append(str(row['Exon']))
dirs.append(str(row['Direction']))
for item in names_dup:
if item not in names:
names.append(item)
forwards = primer_list[::2]
reverses = primer_list[1::2]
while list_position < len(forwards):
ser = pd.Series([names[list_position], forwards[list_position], reverses[list_position]])
primer_seqs = primer_seqs.append(ser, ignore_index=True)
list_position += 1
primer_seqs.to_csv(csv, header=None, index=None, sep='\t')
# (2) Runs virtual PCR on generated csv.
bedfile = self.run_pcr(csv)
tool = BedTool(bedfile)
# (3) Uses results to calculate start and end position of each primer (results give PCR product). Adds to df.
for row in tool:
chroms.append(row.chrom)
start_coords.append(row.start)
end_coords.append(row.start + len(primer_list[seq_position]))
chroms.append(row.chrom)
end_coords.append(row.end)
start_coords.append(row.end - len(primer_list[seq_position + 1]))
seq_position += 1
df_coords = pd.DataFrame([])
df_coords.insert(0, 'chrom', chroms)
df_coords.insert(1, 'start', start_coords)
df_coords.insert(2, 'end', end_coords)
df_coords.insert(3, 'name', names)
# (4) Generates a bed file from df_coords (not currently used in application).
bed = os.path.splitext(bedfile)[0]
df_coords.to_csv('%s.csv' % bed, header=None, index=None, sep='\t') # cannot directly convert to bed.
csv_file = BedTool('%s.csv' % bed)
csv_file.saveas('%s.bed' % bed)
df_coords.insert(4, 'Exon', exons) # not need in bed file so added after.
df_coords.insert(5, 'Direction', dirs)
# Removes unnecessary files and moves BED file into shared folder. (add /tests for unit testing)
os.system("rm /home/cuser/PycharmProjects/django_apps/mysite/%s.csv" % bed)
os.system("mv /home/cuser/PycharmProjects/django_apps/mysite/%s.bed /media/sf_sarah_share/bedfiles" %
bed)
os.system("rm /home/cuser/PycharmProjects/django_apps/mysite/%s" % csv)
return df_coords
示例13: str
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import saveas [as 别名]
c1 = 0 # total sequence
c2 = 0 # coverage sequence
for line in str(a).split('\n'): # .saveas('CoverageSmalltoLarge_'+species)
if line:
c1 += int(line.split('\t')[3])-int(line.split('\t')[2])
c2 += int(line.split('\t')[-1])
f.write(species + '\n[Small/large]: ' + str(float(c2) / float(c1) * 100.) + '\n')
c1 = 0
c2 = 0
# np.vectorize()(str(BedTool('./27Species/'+species).coverage(BedTool('./20Species/'+species))))#.saveas('CoverageLargetoSmall_'+species)
for line in str(b).split('\n'): # .saveas('CoverageSmalltoLarge_'+species)
if line:
c1 += int(line.split('\t')[3]) - int(line.split('\t')[2])
c2 += int(line.split('\t')[-1])
f.write('[Large/small]: ' + str(float(c2) / float(c1) * 100.) + '\n\n')
a.saveas('CoverageSmalltoLarge_' + species)
b.saveas('CoverageLargetoSmall_' + species)
"""
for species in listSubsetSpeciesFiles:
a = BedTool('./20Species/' + species).coverage(BedTool('./27Species/' + species))
b = BedTool('./27Species/'+species).coverage(BedTool('./20Species/'+species))
c1 = 0 # total sequence
c2 = 0 # coverage sequence
for line in str(a).split('\n'):#.saveas('CoverageSmalltoLarge_'+species)
if line:
c1 += int(line.split('\t')[-2])
c2 += int(line.split('\t')[-3])
f.write(species+'\n[Small/large]: '+str(float(c2)/float(c1)*100.)+'\n')
c1 = 0
c2 = 0
#np.vectorize()(str(BedTool('./27Species/'+species).coverage(BedTool('./20Species/'+species))))#.saveas('CoverageLargetoSmall_'+species)