本文整理汇总了Python中pysnptools.snpreader.Bed.read方法的典型用法代码示例。如果您正苦于以下问题:Python Bed.read方法的具体用法?Python Bed.read怎么用?Python Bed.read使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pysnptools.snpreader.Bed
的用法示例。
在下文中一共展示了Bed.read方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: setUpClass
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import read [as 别名]
def setUpClass(self):
self.currentFolder = os.path.dirname(os.path.realpath(__file__))
#TODO: get data set with NANs!
snpreader = Bed(self.currentFolder + "/examples/toydata",count_A1=False)
self.pheno_fn = self.currentFolder + "/examples/toydata.phe"
self.snpdata = snpreader.read(order='F',force_python_only=True)
self.snps = self.snpdata.val
示例2: setUpClass
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import read [as 别名]
def setUpClass(self):
currentFolder = os.path.dirname(os.path.realpath(__file__))
self.snp_fn = currentFolder + "/../../tests/datasets/mouse/alldata"
self.pheno_fn = currentFolder + "/../../tests/datasets/mouse/pheno_10_causals.txt"
#self.cov_fn = currentFolder + "/examples/toydata.cov"
# load data
###################################################################
snp_reader = Bed(self.snp_fn)
pheno = pstpheno.loadOnePhen(self.pheno_fn)
#cov = pstpheno.loadPhen(self.cov_fn)
# intersect sample ids
snp_reader, pheno = pysnptools.util.intersect_apply([snp_reader, pheno])
self.G = snp_reader.read(order='C').val
self.G = stdizer.Unit().standardize(self.G)
self.G.flags.writeable = False
self.y = pheno['vals'][:,0]
self.y.flags.writeable = False
# load pcs
#self.G_cov = cov['vals']
self.G_cov = np.ones((len(self.y), 1))
self.G_cov.flags.writeable = False
示例3: read_plink
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import read [as 别名]
def read_plink(self, fn_plink = None):
"""
plink reader
"""
PL = Bed(fn_plink)
PLOB = PL.read()
self.GT = PLOB.val
self.POS = PLOB.pos[:,[0,1]]
self.SID = PLOB.iid[:,1]
self.isNormalised = False
示例4: factory
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import read [as 别名]
def factory(snpreader, num_snps_in_memory, standardizer, blocksize):
if isinstance(snpreader, str):
snpreader = Bed(snpreader)
if num_snps_in_memory >= snpreader.sid_count:
in_memory = InMemory(snpreader.read(order='C').standardize(standardizer), standardizer, blocksize)
in_memory._snpreader.val.flags.writeable = False
in_memory._val = in_memory._snpreader.val
return in_memory
else:
return FromDisk(snpreader, num_snps_in_memory, standardizer, blocksize, None)
示例5: process_data
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import read [as 别名]
def process_data(input_path, output_path, name):
snpreader = Bed(os.path.join(input_path, name))
data = snpreader.read()
values = data.val
preproc_vals = pysnp_genpreproc(values)
assert(np.any(np.isnan(preproc_vals)) == False)
saved = os.path.join(output_path, name + ".h5py")
path, keys = h5_save(path=saved, data_obj={name:preproc_vals}, dt='f')
return {'n_subjects':data.iid_count, 'subject_ids':data.iid,
'n_snps':data.sid_count, 'snp_ids':data.sid,
'data_preprocessed_location': {'path':path, 'key':keys}}
示例6: test_write_x_x_cpp
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import read [as 别名]
def test_write_x_x_cpp(self):
snpreader = Bed(self.currentFolder + "/examples/toydata")
for order in ['C','F']:
for dtype in [np.float32,np.float64]:
snpdata = snpreader.read(order=order,dtype=dtype)
snpdata.val[-1,0] = float("NAN")
output = "tempdir/toydata.{0}{1}.cpp".format(order,"32" if dtype==np.float32 else "64")
create_directory_if_necessary(output)
Bed.write(snpdata, output)
snpdata2 = Bed(output).read()
assert TestLoader.is_same(snpdata, snpdata2) #!!!define an equality method on snpdata?
示例7: test_write_x_x_cpp
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import read [as 别名]
def test_write_x_x_cpp(self):
snpreader = Bed(self.currentFolder + "/examples/toydata")
for order in ['C','F']:
for dtype in [np.float32,np.float64]:
snpdata = snpreader.read(order=order,dtype=dtype)
snpdata.val[-1,0] = float("NAN")
output = "tempdir/toydata.{0}{1}.cpp".format(order,"32" if dtype==np.float32 else "64")
create_directory_if_necessary(output)
Bed.write(output, snpdata)
snpdata2 = Bed(output).read()
np.testing.assert_array_almost_equal(snpdata.val, snpdata2.val, decimal=10)
示例8: test_subset_view
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import read [as 别名]
def test_subset_view(self):
snpreader2 = Bed(self.currentFolder + "/examples/toydata",count_A1=False)[:,:]
result = snpreader2.read(view_ok=True)
self.assertFalse(snpreader2 is result)
result2 = result[:,:].read()
self.assertFalse(sp.may_share_memory(result2.val,result.val))
result3 = result[:,:].read(view_ok=True)
self.assertTrue(sp.may_share_memory(result3.val,result.val))
result4 = result3.read()
self.assertFalse(sp.may_share_memory(result4.val,result3.val))
result5 = result4.read(view_ok=True)
self.assertTrue(sp.may_share_memory(result4.val,result5.val))
示例9: main
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import read [as 别名]
def main(args):
print('reading seeed snps')
seed_snps = pd.read_csv(args.seed_snps, header=None, names=['SNP'], index_col='SNP')
seed_snps['ibs_length'] = 0
seed_snps['ibd'] = 0
print('reading typed snps')
typed_snps = pd.read_csv(args.typed_snps, header=None, names=['SNP'])
print('reading genotypes')
data = Bed(args.bfile)
X = data.read().val
typed_snps_indices = np.sort(data.sid_to_index(typed_snps.SNP))
typed_snps_bp = data.col_property[typed_snps_indices,2]
print(len(seed_snps), 'snps in list')
print(data.iid_count, data.sid_count, 'are dimensions of X')
def analyze_snp(i):
# find first typed snp after query snp
snp_bp = data.col_property[i,2]
v = np.where(typed_snps_bp > snp_bp)[0]
if len(v) > 0:
typed_i = v[0]
else:
typed_i = len(typed_snps_indices)-1
n1, n2 = np.where(X[:,i] == 1)[0]
if (X[n1,typed_snps_indices[typed_i]] - X[n2, typed_snps_indices[typed_i]])**2 == 4:
return 0, 0
typed_il, typed_ir = fis.find_boundaries(
X[n1,typed_snps_indices],
X[n2,typed_snps_indices],
typed_i)
typed_ir -= 1
il = typed_snps_indices[typed_il]
ir = typed_snps_indices[typed_ir]
cM = data.col_property[ir, 1] - \
data.col_property[il, 1]
ibd = (np.mean(X[n1,il:ir] == X[n2,il:ir]) > 0.99)
return cM, int(ibd)
for (i, snp) in iter.show_progress(
it.izip(data.sid_to_index(seed_snps.index), seed_snps.index),
total=len(seed_snps)):
# total=10):
seed_snps.ix[snp, ['ibs_length', 'ibd']] = analyze_snp(i)
print(seed_snps.iloc[:100])
seed_snps.to_csv(args.outfile, sep='\t')
示例10: main
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import read [as 别名]
def main():
"""
example that compares output to fastlmmc
"""
# set up data
phen_fn = "../feature_selection/examples/toydata.phe"
snp_fn = "../feature_selection/examples/toydata.5chrom"
#chrom_count = 5
# load data
###################################################################
snp_reader = Bed(snp_fn)
pheno = pstpheno.loadOnePhen(phen_fn)
cov = None
#cov = pstpheno.loadPhen(self.cov_fn)
snp_reader, pheno, cov = intersect_apply([snp_reader, pheno, cov])
G = snp_reader.read(order='C').val
G = stdizer.Unit().standardize(G)
G.flags.writeable = False
y = pheno['vals'][:,0]
y.flags.writeable
# load pcs
#G_pc = cov['vals']
#G_pc.flags.writeable = False
delta = 2.0
gwas = WindowingGwas(G, y, delta=delta)
pv = gwas.run_gwas()
from fastlmm.association.tests.test_gwas import GwasTest
REML = False
snp_pos_sim = snp_reader.sid
snp_pos_test = snp_reader.sid
os.environ["FastLmmUseAnyMklLib"] = "1"
gwas_c = GwasTest(snp_fn, phen_fn, snp_pos_sim, snp_pos_test, delta, REML=REML, excludeByPosition=0)
gwas_c.run_gwas()
import pylab
pylab.plot(np.log(pv), np.log(gwas_c.p_values), "+")
pylab.plot(np.arange(-18, 0), np.arange(-18,0), "-k")
pylab.show()
np.testing.assert_array_almost_equal(np.log(pv), np.log(gwas_c.p_values), decimal=3)
simple_manhattan_plot(pv)
示例11: test_SNC
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import read [as 别名]
def test_SNC(self):
logging.info("TestSNC")
test_snps = self.bedbase
pheno = pstpheno.loadOnePhen(self.phen_fn,vectorize=True)
covar = pstpheno.loadPhen(self.cov_fn)
bed = Bed(test_snps, count_A1=False)
snc = bed.read()
snc.val[:,2] = [0] * snc.iid_count # make SNP #2 have constant values (aka a SNC)
output_file_name = self.file_name("snc")
frame = single_snp(test_snps=snc[:,:10], pheno=pheno, G0=snc, mixing=0,leave_out_one_chrom=False,
covar=covar, output_file_name=output_file_name,count_A1=False
)
self.compare_files(frame,"snc")
示例12: load_plink_bed_bim_fam_dataset
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import read [as 别名]
def load_plink_bed_bim_fam_dataset(path_dataset, snp_ids=None,
subject_ids=None, count_A1=True):
"""
Load a Plink bed/bim/fam dataset as a SnpData instance. Optionnally a
specific list of snps or subjects can be extracted to avoid loading
everything in memory.
Parameters
----------
path_dataset: str
Path to the Plink bed/bim/fam dataset, with or without .bed extension.
snp_ids: list/set of str, default None
Snps that should be extracted if available in the dataset.
By default None, all snps are loaded.
subject_ids: list of str, default None
Subjects that should be extracted if available in the dataset.
By default None, all subjects are loaded.
count_A1: bool, default True
Genotypes are provided as allele counts, A1 if True else A2.
Return
------
snp_data: pysnptools object
PLINK data loaded by the 'pysnptools' library.
"""
# Load the metadata, without loading the genotypes
snp_data = Bed(path_dataset, count_A1=count_A1)
# If requested, filter on snp ids
if snp_ids is not None:
snp_ids = set(snp_ids)
snp_bool_indexes = [(s in snp_ids) for s in snp_data.sid]
snp_data = snp_data[:, snp_bool_indexes]
# If requested, filter on subject ids
if subject_ids is not None:
subject_ids = set(subject_ids)
subject_bool_indexes = [(s in subject_ids) for s in snp_data.iid[:, 1]]
snp_data = snp_data[subject_bool_indexes, :]
# Load the genotypes from the Plink dataset
snp_data = snp_data.read()
return snp_data
示例13: cluster_data
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import read [as 别名]
def cluster_data(snpreader):
"""
compute hierarchical clustering of snp data set in bed_fn
"""
if isinstance(snpreader,str):
snpreader = Bed(snpreader)
G = snpreader.read().standardize().val
# Generate distance matrix
from sklearn.metrics.pairwise import euclidean_distances
D = euclidean_distances(G, G)
# Compute and plot first dendrogram.
fig = pylab.figure(figsize=(8,8))
ax1 = fig.add_axes([0.09,0.1,0.2,0.6])
Y = fc.linkage(D, method='average') #method="centroid" is cubic!
Z1 = sch.dendrogram(Y, orientation='right')
ax1.set_xticks([])
ax1.set_yticks([])
# Compute and plot second dendrogram.
ax2 = fig.add_axes([0.3,0.71,0.6,0.2])
#Y = sch.linkage(D, method='single')
Z2 = sch.dendrogram(Y)
ax2.set_xticks([])
ax2.set_yticks([])
# Plot distance matrix.
axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
idx1 = Z1['leaves']
#dx2 = Z2['leaves']
D = D[idx1,:]
D = D[:,idx1]
axmatrix.matshow(D, aspect='auto', origin='lower', cmap=pylab.cm.YlGnBu)
axmatrix.set_xticks([])
axmatrix.set_yticks([])
pylab.show()
示例14: genPheno
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import read [as 别名]
def genPheno(filename="../thinFam",per=.5,savename="fakePheno.txt",c=2.0,num=5):
sFil=Bed(filename);
D=sFil.read().val;
m=len(D[0]);
n=len(D);
print m;
print n;
I=[rand.randint(0,m-1) for i in range(0,num)];
SNP=[[D[j][i] for j in range(0,n)] for i in I]
#p0=n*peir/sum([c**i*len([j for j in SNP if j==float(i)]) for i in range(0,3)])
print len(I);
print len(SNP);
print len(SNP[0]);
print n;
print min([len(s) for s in SNP])
print SNP;
SNP=[[max(i,0.0) for i in s] for s in SNP]
for i in range(0,num):
for j in range(0,n):
if not SNP[i][j] in [1.0,0.0,2.0]:
SNP[i][j]=0.0;
print [list(set(s)) for s in SNP]
示例15: SnpData
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import read [as 别名]
# Find out about iids and sids
print snpreader.iid_count
print snpreader.sid_count
print snpreader.iid[:3]
print snpreader.sid[:3]
#500
#5000
#[['cid0P0' 'cid0P0']
# ['cid1P0' 'cid1P0']
# ['cid2P0' 'cid2P0']]
#['snp625_m0_.03m1_.07' 'snp1750_m0_.02m1_.04' 'snp0_m0_.37m1_.24']
#Read all the SNP data in to memory
snpdata = snpreader.read()
#What is snpdata?
# SnpData(Bed("all.bed"))
#What do the iids and sid of snprdata look like?
print snpdata.iid_count, snpdata.sid_count
print snpdata.iid[:3]
print snpdata.sid[:3]
# The same.
# print the SNP data
print snpdata.val
#[[ 2. 2. 1. ..., 2. 1. 2.]
# [ 2. 2. 1. ..., 2. 0. 2.]
# [ 2. 2. 1. ..., 1. 1. 1.]
# ...,