当前位置: 首页>>代码示例>>Python>>正文


Python pysam.FastaFile方法代码示例

本文整理汇总了Python中pysam.FastaFile方法的典型用法代码示例。如果您正苦于以下问题:Python pysam.FastaFile方法的具体用法?Python pysam.FastaFile怎么用?Python pysam.FastaFile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pysam的用法示例。


在下文中一共展示了pysam.FastaFile方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_data

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def get_data(fasta_file,region=None):

    if None == region:

        with open(fasta_file,"r") as f:

            return(f.read())

    else :

        if isinstance(region,str):
            region = regions.parse_region(region)

        chr = region["chr"]
        start = region["start"] - 1
        end = region["end"]

        fasta = pysam.FastaFile(fasta_file)

        slice_seq = fasta.fetch(chr, start, end)

        fasta.close()

        return slice_seq 
开发者ID:igvteam,项目名称:igv-reports,代码行数:26,代码来源:fasta.py

示例2: extract_fasta_to_file

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def extract_fasta_to_file(fasta, output_dir, mode="bcolz", overwrite=False):
    assert mode in _array_writer

    makedirs(output_dir, exist_ok=overwrite)
    fasta_file = FastaFile(fasta)
    file_shapes = {}
    for chrom, size in zip(fasta_file.references, fasta_file.lengths):
        data = np.zeros((size, NUM_SEQ_CHARS), dtype=np.float32)
        seq = fasta_file.fetch(chrom)
        one_hot_encode_sequence(seq, data)
        file_shapes[chrom] = data.shape
        _array_writer[mode](data, os.path.join(output_dir, chrom))

    with open(os.path.join(output_dir, "metadata.json"), "w") as fp:
        json.dump(
            {
                "file_shapes": file_shapes,
                "type": "array_{}".format(mode),
                "source": fasta,
            },
            fp,
        ) 
开发者ID:kundajelab,项目名称:genomelake,代码行数:24,代码来源:backend.py

示例3: reverse_bed

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def reverse_bed():
    """Convert bed-file coordinates to coordinates on the reverse strand."""
    parser = argparse.ArgumentParser(
        prog='reverse_bed',
        description='Convert bed-file coordinates to coordinates on the reverse strand.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('bed_in', help='Input bed file.')
    parser.add_argument('ref_fasta', help='Input reference fasta file.')
    parser.add_argument('bed_out', help='Output bed file.')
    args = parser.parse_args()

    fasta = pysam.FastaFile(args.ref_fasta)
    lengths = dict(zip(fasta.references, fasta.lengths))
    d = pd.read_csv(args.bed_in, sep='\t', names=['chrom', 'start', 'stop'])

    d['chrom_length'] = d['chrom'].map(lambda x: lengths[x])
    d['rc_stop'] = d['chrom_length'] - d['start']
    d['rc_start'] = d['chrom_length'] - d['stop']
    d['chrom_rc'] = d['chrom'] + '_rc'
    d[['chrom_rc', 'rc_start', 'rc_stop']].to_csv(args.bed_out, index=False, header=False, sep='\t') 
开发者ID:nanoporetech,项目名称:pomoxis,代码行数:23,代码来源:util.py

示例4: get_gc_content

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def get_gc_content(regions, fasta):
	""" Get GC content from regions in fasta """
	nuc_count = {"T":0, "t":0, "A":0, "a":0, "G":1, "g":1, "C":1, "c":1}

	gc = 0
	total = 0
	fasta_obj = pysam.FastaFile(fasta)
	for region in regions:
		seq = fasta_obj.fetch(region.chrom, region.start, region.end)
		gc += sum([nuc_count.get(nuc, 0.5) for nuc in seq])
		total += region.end - region.start
	fasta_obj.close()
	gc_content = gc / float(total)

	return(gc_content)


#---------------------------------------------------------------------------------------------------------#
#------------------------------------------- Main functions ----------------------------------------------#
#---------------------------------------------------------------------------------------------------------# 
开发者ID:loosolab,项目名称:TOBIAS,代码行数:22,代码来源:bindetect_functions.py

示例5: test_neg_in_serialise

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def test_neg_in_serialise(self):

        line = "tr_c114_g1_i1.mrna1.89\tProdigal_v2.6.3\tCDS\t2\t205\t28.7\t-\t0\t\
ID=85_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.407;\
conf=99.86;score=28.71;cscore=27.10;sscore=1.61;rscore=0.00;uscore=0.00;tscore=1.61;"
        line = GFF.GffLine(line)
        self.assertFalse(line.header)
        self.assertIsNotNone(line.id)
        logger = create_default_logger("test_neg_in_serialise", "DEBUG")
        fasta = pkg_resources.resource_filename("Mikado.tests", "mikado_prepared.fasta")
        fai = pysam.FastaFile(fasta)
        bed = bed12.BED12(line,
                          logger=logger,
                          max_regression=0.1,
                          start_adjustment=True,
                          fasta_index=fai,
                          transcriptomic=True)
        self.assertFalse(bed.invalid, bed.invalid_reason) 
开发者ID:EI-CoreBioinformatics,项目名称:mikado,代码行数:20,代码来源:orf_test.py

示例6: __init__

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def __init__(self, fasta_file):
        self.f = pysam.FastaFile(fasta_file) 
开发者ID:kipoi,项目名称:models,代码行数:4,代码来源:fasta_utils.py

示例7: fasta

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def fasta(self):
        if self._fasta is None:
            # import pyfaidx
            # self._fasta = pyfaidx.Fasta(self.path, as_raw=True)
            import pysam
            self._fasta = pysam.FastaFile(self.path)
        return self._fasta 
开发者ID:nspies,项目名称:genomeview,代码行数:9,代码来源:genomesource.py

示例8: __init__

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def __init__(self, referenceSetId, fastaFile):
        super(ReferenceSetTest, self).__init__(referenceSetId, fastaFile)
        self._fastaFile = pysam.FastaFile(fastaFile) 
开发者ID:ga4gh,项目名称:ga4gh-server,代码行数:5,代码来源:test_references.py

示例9: openFile

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def openFile(self, dataFile):
        return pysam.FastaFile(dataFile) 
开发者ID:ga4gh,项目名称:ga4gh-server,代码行数:4,代码来源:references.py

示例10: __init__

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def __init__(self, datafile, use_strand=False, **kwargs):
        """Fasta file extractor
        
        NOTE: The extractor is not thread-save.
        If you with to use it with multiprocessing,
        create a new extractor object in each process.
        
        Args:
          datafile (str): path to the bigwig file
          use_strand (bool): if True, the extracted sequence
            is reverse complemented in case interval.strand == "-"
        """
        super(FastaExtractor, self).__init__(datafile, **kwargs)
        self.use_strand = use_strand
        self.fasta = FastaFile(self._datafile) 
开发者ID:kundajelab,项目名称:genomelake,代码行数:17,代码来源:extractors.py

示例11: read_chrom_sizes_from_fasta

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def read_chrom_sizes_from_fasta(fastafile):
    """get chromosome size information from fasta file"""
    out = {}
    fasta = pysam.FastaFile(fastafile)
    chr_names = fasta.references
    chr_lengths = fasta.lengths
    fasta.close()
    for i in range(len(chr_lengths)):
        out[chr_names[i]]=int(chr_lengths[i])
    return out 
开发者ID:GreenleafLab,项目名称:NucleoATAC,代码行数:12,代码来源:utils.py

示例12: get_sequence

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def get_sequence(chunk, fastafile):
    """obtain sequence for an interval

        chunk:  chunk object for which sequenceuence is to be fetched
        fastafile: filename for fasta file with sequenceuence
    """
    handle = pysam.FastaFile(fastafile)
    sequence = handle.fetch(chunk.chrom, chunk.start, chunk.end)
    if chunk.strand == "-":
        sequence = reverse_complement(sequence)
    handle.close()
    return sequence.upper() 
开发者ID:GreenleafLab,项目名称:NucleoATAC,代码行数:14,代码来源:seq.py

示例13: getNucFreqsFromChunkList

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def getNucFreqsFromChunkList(chunks, fasta, nucleotides):
    """Get nucleotide frequences within regions of genome"""
    out = np.zeros(len(nucleotides))
    n = 0.0
    handle = pysam.FastaFile(fasta)
    for chunk in chunks:
        sequence = handle.fetch(chunk.chrom, chunk.start, chunk.end)
        sequence = sequence.upper()
        out += [sequence.count(i) for i in nucleotides]
        n += len(sequence)
    handle.close()
    return out/n 
开发者ID:GreenleafLab,项目名称:NucleoATAC,代码行数:14,代码来源:seq.py

示例14: __init__

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def __init__(self, filename):
        self._fh = FastaFile(filename) 
开发者ID:biocommons,项目名称:biocommons.seqrepo,代码行数:4,代码来源:fabgz.py

示例15: close

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def close(self):
        if self._fh:
            self._fh.close()
            self._fh = None
            subprocess.check_call([self._bgzip_exe, "--force", self._basepath])
            os.rename(self._basepath + ".gz", self.filename)

            # open file with FastaFile to create indexes, then make all read-only
            _fh = FastaFile(self.filename)
            _fh.close()
            os.chmod(self.filename, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
            os.chmod(self.filename + ".fai", stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
            os.chmod(self.filename + ".gzi", stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)

            _logger.info("{} written; added {} sequences".format(self.filename, len(self._added))) 
开发者ID:biocommons,项目名称:biocommons.seqrepo,代码行数:17,代码来源:fabgz.py


注:本文中的pysam.FastaFile方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。