本文整理汇总了Python中scipy.corrcoef函数的典型用法代码示例。如果您正苦于以下问题:Python corrcoef函数的具体用法?Python corrcoef怎么用?Python corrcoef使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了corrcoef函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: alignAndCompareMotifs
def alignAndCompareMotifs(motif1, motif2, reportAll=False, tryAllAlignments=True, reverseComp=True, quitThreshold=None, normalizeRows=True, fillValue=.25):
""" Compare the PWM's for two motifs by calculating their correlation coefficient.
By default, all possible alignments and orientations will be tried and the top coefficient will be reported.
fillValue may be a number, or a 4-element array with nuc frequencies
Returns (corrCoef, motif2_relative_posn, motif2_orientation) form best alignment, or the entire list if reportAll=True.
"""
pwm1,pwm2 = motif1.matrix, motif2.matrix
if normalizeRows: # make sum in each row = 1
pwm1, pwm2 = map(normalizePwmRows, [pwm1, pwm2])
alignsToTry = xrange(-len(motif2) + 1, len(motif1)-1) if tryAllAlignments else [0] # all possible shifts or no shifting
results = []
for curOffset in alignsToTry:
curPwm1, curPwm2 = map(scipy.array, extendPWMs(pwm1, pwm2, curOffset, fillValue))
# flatten arrays and take 1-dimensional correlation between them
corrCoef = scipy.corrcoef(curPwm1.ravel(), curPwm2.ravel())[0,1] # top-right is correlation between matrices
results.append([corrCoef, curOffset, 1])
if quitThreshold is not None and corrCoef > quitThreshold:
# return immediately if quit threshold has been passed
break
if reverseComp:
curPwm2 = scipy.array(reverseComplement(curPwm2))
corrCoef = scipy.corrcoef(curPwm1.ravel(), curPwm2.ravel())[0,1] # top-right is correlation between matrices
results.append([corrCoef, curOffset, -1])
if quitThreshold is not None and corrCoef > quitThreshold:
# return immediately if quit threshold has been passed
break
if reportBest:
results = scipy.array(results)
best = results[results[:,0].argmax(), :] # choose the result (row) with the best corrCoef
return best
else:
return results
示例2: Corr
def Corr(GDP,I,C):
m = sp.shape(GDP)[1]
GDPIcorr = []
GDPCcorr = []
for i in range(0, m):
gdp = GDP[:,i]
inv = I[:,i]
con = C[:,i]
#Correlation between output and investment for each series
gdpi = sp.corrcoef(gdp,inv)
GDPIcorr.append(gdpi[0,1])
#Correlation between output and consumption for each series
gdpc = sp.corrcoef(gdp,con)
GDPCcorr.append(gdpc[0,1])
#Mean and standard deviation of correlation between GDP and
#Investment and Consumption over total number of simulations
GDPICORR = sp.array(GDPIcorr)
gdpimean = sp.mean(GDPICORR)
gdpistdev = sp.std(GDPICORR)
GDPCCORR = sp.array(GDPCcorr)
gdpcmean = sp.mean(GDPCCORR)
gdpcstdev = sp.std(GDPCCORR)
sp.savetxt('GDPICORR.csv',GDPICORR)
sp.savetxt('GDPCCORR.csv',GDPCCORR)
print "The mean and standard deviation between GDP and"
print "Investment and GDP and Consumption followed by"
print "The lists of each correlation coefficient for"
print "each series are saved in csv files"
return gdpimean, gdpistdev, gdpcmean, gdpcstdev
示例3: weighted_average_aligned_runs
def weighted_average_aligned_runs(self,sources,mixing):
'''
Averages one aligned ICA run and calculates the reproducibility for each component. This version does not
add only super-threshold CCs to the reproducibililty index, and it uses a weighted average to form the
average components. The weights are defined as w_i = sum_{j neq i} SCC(i,j).
'''
rep = np.triu(np.abs(corrcoef(sources)),1).sum()/(0.5*self.K*(self.K-1))
rWeights = np.asarray([(np.abs(corrcoef(sources)[j,:]).sum() - 1.0)/(sources.shape[0]-1) for j in range(0,sources.shape[0])])[:,np.newaxis]
return ((rWeights*sources).sum(axis=0))/(rWeights.sum()),((mixing*rWeights.T).sum(axis=1))/(rWeights.sum()),rep
示例4: word_party_correlations
def word_party_correlations(folder='model'):
stopwords = codecs.open("stopwords.txt", "r", "utf-8").readlines()[5:]
stops = map(lambda x:x.lower().strip(),stopwords)
# using now stopwords and filtering out digits
bow = TfidfVectorizer(min_df=2)
datafn = folder+'/textdata/rawtext.pickle'
data = cPickle.load(open(datafn))
bow = bow.fit(chain.from_iterable(data.values()))
# create numerical labels
Y = hstack(map((lambda x: ones(len(data[data.keys()[x]]))*x),range(len(data))))
# create data matrix
for key in data.keys():
data[key] = bow.transform(data[key])
X = vstack(data.values())
# map sentiment vector to bow space
words = load_sentiment()
sentiment_vec = zeros(X.shape[1])
for key in words.keys():
if bow.vocabulary_.has_key(key):
sentiment_vec[bow.vocabulary_[key]] = words[key]
# do sentiment analysis
sentiments = X.dot(sentiment_vec)
# compute label-BoW-tfidf-feature correlation
lb = LabelBinarizer()
partylabels = zscore(lb.fit_transform(Y),axis=0)
# sentiment vs party correlation
sentVsParty = corrcoef(partylabels.T,sentiments)[-1,:-1]
fn = folder+'/sentiment_vs_party.json'
for key in range(len(data.keys())):
print "Sentiment vs Party %s: %0.2f"%(data.keys()[key],sentVsParty[key])
json.dump(dict(zip(data.keys(),sentVsParty)),open(fn,'wb'))
wordidx2word = dict(zip(bow.vocabulary_.values(),bow.vocabulary_.keys()))
allcors = dict(zip(data.keys(),[[]]*len(data.keys())))
# this is extremely cumbersome and slow, ...
# but computing the correlations naively on the matrices
# requires densifying the matrix X, which is memory intense
for partyidx in range(len(data.keys())):
cors_words = []
print 'Computing correlations for %s'%data.keys()[partyidx]
for wordidx in range(X.shape[-1]):
cors = corrcoef(X[:,wordidx].todense().flatten(),partylabels[:,partyidx])[1,0]
if abs(cors)>.01:
cors_words.append((wordidx2word[wordidx],cors))
allcors[data.keys()[partyidx]] = dict(cors_words)
fn = folder+'/words_correlations.json'
json.dump(dict(allcors),open(fn,'wb'))
示例5: selective_average_aligned_runs
def selective_average_aligned_runs(self,sources,mixing):
'''
Averages one aligned ICA run and calculates a reproducibility index. This version uses the original
definition in Yang et al.
'''
# threshold for inclusion
thresh = 0.7
corrsToSum = np.triu(np.abs(corrcoef(sources)),1).flatten()
rep = (corrsToSum[np.nonzero(corrsToSum > thresh)].sum())/(0.5*self.K*(self.K-1))
# now only add a component to the average if there is at least one correlation with the other RCs > threshold
# the > 1 statement is because the diagonal elements are always 1.0, so there will always be at least one
# cross-correlation (namely self-correlation) which is bigger than 1
toInclude = ((np.abs(corrcoef(sources)) > thresh).sum(axis=0) > 1)
return sources[toInclude,:].mean(axis=0),mixing[:,toInclude].mean(axis=1),rep
示例6: cal_coff
def cal_coff(array,indicator):
axis = indicator == 0;
if axis:
length = array.shape[1]
else:
length = array.shape[0]
for x in xrange(0,length):
for y in xrange(0,length):
if x != y :
if axis:
yield sp.corrcoef(array[:,x], array[:,y])
else:
yield sp.corrcoef(array[x,:], array[y,:])
示例7: pcor
def pcor(X,Y,Z):
"""
computes the correlation amtrix of X and Y conditioning on Z
"""
if X.ndim==1: X = X[:,SP.newaxis]
if Y.ndim==1: Y = Y[:,SP.newaxis]
if Z is None: return STATS.pearsonr(X,Y)
if Z.ndim==1: Z = Z[:,SP.newaxis]
nSamples = X.shape[0]
betaX, _, _, _ = LA.lstsq(Z,X)
betaY, _, _, _ = LA.lstsq(Z,Y)
Xres = X - SP.dot(Z,betaX)
Yres = Y - SP.dot(Z,betaY)
corr_cond = SP.corrcoef(Xres[:,0],Yres[:,0])[0,1]
dz = Z.shape[1] # dimension of conditioning variable
df = max(nSamples - dz - 2,0) # degrees of freedom
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
tstat = corr_cond / SP.sqrt(1.0 - corr_cond ** 2) # calculate t statistic
tstat = math.sqrt(df) * tstat
pv_cond = 2 * t.cdf(-abs(tstat), df, loc=0, scale=1) # calculate p value
return corr_cond,pv_cond
示例8: selectTraits
def selectTraits(self,phenoMAF=None,corrMin=None,nUnique=False):
"""
use only a subset of traits
filter out all individuals that have missing values for the selected ones
"""
self.idx_samples = SP.ones(self.n_s,dtype=bool)
# filter out nan samples
self.idx_samples[SP.isnan(self.Y[:,self.idx_traits]).any(1)] = False
# filter out phenotypes that are not diverse enough
if phenoMAF!=None:
expr_mean = self.Y[self.idx_samples].mean(0)
expr_std = self.Y[self.idx_samples].std(0)
z_scores = SP.absolute(self.Y[self.idx_samples]-expr_mean)/SP.sqrt(expr_std)
self.idx_traits[(z_scores>1.5).mean(0) < phenoMAF] = False
# use only correlated phenotypes
if corrMin!=None and self.Y.shape[1]>1:
corr = SP.corrcoef(self.Y[self.idx_samples].T)
corr-= SP.eye(corr.shape[0])
self.idx_traits[SP.absolute(corr).max(0)<0.3] = False
# filter out binary phenotypes
if nUnique and self.Y.shape[1]>1:
for i in range(self.Y.shape[1]):
if len(SP.unique(self.Y[self.idx_samples][:,i]))<=nUnique:
self.idx_traits[i] = False
LG.debug('number of traits(before filtering): %d'%self.n_t)
LG.debug('number of traits(after filtering): %d'%self.idx_traits.sum())
LG.debug('number of samples(before filtering): %d'%self.n_s)
LG.debug('number of samples(after filtering): %d'%self.idx_samples.sum())
示例9: calculate_stock_correlation
def calculate_stock_correlation(data):
"""
This function should take a list containing two lists of the form
returned by get_yahoo_data (list of date, adj. close tuples) and
return the correlation of the daily returns as defined above.
"""
apple_returns = []
google_returns = []
apple_data = data[0]
google_data = data[1]
cm = apple_data[0][1]
for i in range(1,len(apple_data)):
cn = apple_data[i][1]
daily_return = (cn-cm)/cm
apple_returns.append(daily_return)
cm = cn
cm = google_data[0][1]
for i in range(1,len(google_data)):
cn = google_data[i][1]
daily_return = (cn-cm)/cm
google_returns.append(daily_return)
cm = cn
corr_matrix = scipy.corrcoef(google_returns,apple_returns)
corr_value = corr_matrix[0][1]
return corr_value
示例10: get_correlations
def get_correlations(self, pids=None):
"""
Returns correlation matrix between traits
All traits are used if pids is left empty.
"""
import bisect
if not pids:
pids = sorted(self.phen_dict.keys())
num_traits = len(pids)
corr_mat = sp.ones((num_traits, num_traits))
for i, pid1 in enumerate(pids):
pd = self.get_avg_value_dict(pid1)
ets1 = pd['ecotypes']
pvs1 = pd['values']
for j, pid2 in enumerate(pids[:i]):
pd = self.get_avg_value_dict(pid2)
ets2 = pd['ecotypes']
pvs2 = pd['values']
common_ets = set(ets1).intersection(set(ets2))
ets_ix1 = map(ets1.index, common_ets)
ets_ix2 = map(ets2.index, common_ets)
vs1 = [pvs1[et_i] for et_i in ets_ix1]
vs2 = [pvs2[et_i] for et_i in ets_ix2]
corr_mat[i, j] = sp.corrcoef(vs1, vs2)[0, 1]
corr_mat[j, i] = corr_mat[i, j]
return corr_mat, pids
示例11: simple_supervised_demo
def simple_supervised_demo():
print "Simple demo of supervised factor inference"
model = get_simple_model_object(expr_file='data/expression_sparse.csv') # simple object using default simulated dataset; see simple_unsupervised_demo for how it is constructed
prior = SP.loadtxt("data/prior_sparse.csv",delimiter=",") # and prior for which factor regulates which gene. This matrix has entries between 0 and 1. The (g,k) entry represents the probability that gene g is affected by factor k
model.setSparsityPrior(prior) # prior on which factors affect which genes
model.update()
for i in range(prior.shape[1]):
print "Correlation between factor",i, "prior and weight",SP.corrcoef(model.getW()[:,i], prior[:,i])[0,1], "sum prior", sum(prior[:,i])
示例12: summarize_accuracy
def summarize_accuracy(prs_files):
true_phens = []
prs_phens = []
ldpred_phens = []
tp_prs_rs = []
tp_ldpred_rs = []
for prsf in prs_files:
if os.path.isfile(prsf):
rt = pd.read_csv(prsf,skipinitialspace=True, index_col=False)
true_phens.extend(rt['true_phens'])
prs_phens.extend(rt['raw_effects_prs'])
ldpred_phens.extend(rt['pval_derived_effects_prs'])
tp_prs_rs.append(sp.corrcoef(rt['true_phens'],rt['raw_effects_prs'])[0,1])
tp_ldpred_rs.append(sp.corrcoef(rt['true_phens'],rt['pval_derived_effects_prs'])[0,1])
return (sp.mean(tp_prs_rs),sp.mean(tp_ldpred_rs))
示例13: correlationMatrix
def correlationMatrix(mdata,linit,lend,nstep):
lstep=(lend-linit)/nstep
corr=np.zeros((mdata.shape[0],mdata.shape[0]))
for length in range(linit,lend,lstep):
corrs=corrcoef(mdata[:,length:length+lstep])
corr+=corrs
corr/=nstep
return corr
示例14: portfolio_var
def portfolio_var(R,w):
cor = sp.corrcoef(R.T)
std_dev=sp.std(R,axis=0)
var = 0.0
for i in xrange(n):
for j in xrange(n):
var += w[i]*w[j]*std_dev[i]*std_dev[j]*cor[i, j]
return var
示例15: pca
def pca(dat, npca=None, verbose = False):
if isinstance(dat, sp.ndarray):
dat = pd.DataFrame(dat)
names = []
for i in range(dat.shape[1]):
names.append("x"+str(i+1))
dat.columns = names
names = list(dat.columns)
nr = dat.shape[0]
nc = dat.shape[1]
r = sp.corrcoef(dat, rowvar=False)
heikin = dat.mean(axis=0)
bunsan = dat.var(axis=0, ddof=1)
sd = sp.sqrt(bunsan)
eval, evec = linalg.eig(r)
eval = sp.real(eval)
rank = rankdata(eval, method="ordinal")
rank = nc+1-rank
eval2 = eval.copy()
evec2 = evec.copy()
for i in range(nc):
j = sp.where(rank == i+1)[0][0]
eval[i] = eval2[j]
evec[:, i] = evec2[:, j]
contr = eval/nc*100
cum_contr = sp.cumsum(contr)
fl = (sp.sqrt(eval)*evec)
for i in range(nc):
dat.ix[:, i] = (dat.ix[:, i]-heikin[i]) / sd[i]
fs = sp.dot(dat, evec*sp.sqrt(nr/(nr-1)))
if npca is None:
npca = sp.sum(eval >= 1)
eval = eval[0:npca]
cont = eval/nc
cumc = sp.cumsum(cont)
fl = fl[:, 0:npca]
rcum = sp.sum((fl ** 2), axis=1)
if verbose:
print(" ", end="")
for j in range(npca):
print("{0:>8s}".format("PC"+str(j+1)), end="")
print(" Contribution")
for i in range(nc):
print("{0:>12s}".format(names[i]), end="")
for j in range(npca):
print(" {0:7.3f}".format(fl[i, j]), end="")
print(" {0:7.3f}".format(rcum[i]))
print(" Eigenvalue", end="")
for j in range(npca):
print(" {0:7.3f}".format(eval[j]), end="")
print("\nContribution", end="")
for j in range(npca):
print(" {0:7.3f}".format(cont[j]), end="")
print("\nCum.contrib.", end="")
for j in range(npca):
print(" {0:7.3f}".format(cumc[j]), end="")
print()
return {"r":r, "fl":fl, "eval":eval, "fs":fs[:, 0:npca]}