本文整理汇总了Python中sandbox.util.Util.Util类的典型用法代码示例。如果您正苦于以下问题:Python Util类的具体用法?Python Util怎么用?Python Util使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Util类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: readAuthorsAndDocuments
def readAuthorsAndDocuments(self, useAbstract=True):
logging.debug("About to read file " + self.dataFilename)
inFile = open(self.dataFilename)
authorList = []
citationList = []
documentList = []
lastAbstract = ""
lastVenue = ""
lastTitle = ""
lastAuthors = []
lastCitationNo = 0
for i, line in enumerate(inFile):
Util.printIteration(i, self.stepSize, self.numLines)
#Match the fields in the file
emptyLine = line == "\n"
title = re.findall("#\*(.*)", line)
currentAuthors = re.findall("#@(.*)", line)
abstract = re.findall("#!(.*)", line)
venue = re.findall("#conf(.*)", line)
citationNo = re.findall("#citation(.*)", line)
if emptyLine:
if useAbstract:
document = lastTitle + " " + lastAbstract
else:
document = lastTitle
documentList.append(document)
authorList.append(lastAuthors)
citationList.append(lastCitationNo)
lastAbstract = ""
lastTitle = ""
lastAuthors = []
lastCitationNo = 0
if len(title) != 0 and len(title[0]) != 0:
lastTitle = title[0]
if len(venue) != 0 and len(venue[0]) != 0:
lastVenue = venue[0]
if len(abstract) != 0 and len(abstract[0]) != 0:
lastAbstract = abstract[0]
if len(citationNo) != 0 and len(citationNo[0]) != 0:
lastCitationNo = int(citationNo[0])
if len(currentAuthors) != 0:
currentAuthors = currentAuthors[0].split(",")
currentAuthors = set([x.strip() for x in currentAuthors])
currentAuthors = currentAuthors.difference(set([""]))
lastAuthors = currentAuthors
inFile.close()
logging.debug("Finished reading " + str(len(documentList)) + " articles")
return authorList, documentList, citationList
示例2: evaluate
def evaluate(self, g1, g2, debug=False):
"""
Find the kernel evaluation between two graphs
"""
#W1 is always the smallest graph
if g1.getNumVertices() > g2.getNumVertices():
return self.evaluate(g2, g1)
#We ought to have something that makes the matrices the same size
W1, W2 = self.__getWeightMatrices(g1, g2)
K1, K2 = self.__getKernelMatrices(g1, g2)
#Find common eigenspace
S1, U = numpy.linalg.eigh(self.tau*W1 + (1-self.tau)*K1)
S2, V = numpy.linalg.eigh(self.tau*W2 + (1-self.tau)*K2)
#Find appoximate diagonals
SK1 = numpy.diag(Util.mdot(U.T, K1, U))
SW1 = numpy.diag(Util.mdot(U.T, W1, U))
SK2 = numpy.diag(Util.mdot(V.T, K2, V))
SW2 = numpy.diag(Util.mdot(V.T, W2, V))
evaluation = self.tau * numpy.dot(SW1, SW2) + (1-self.tau)*numpy.dot(SK1, SK2)
if debug:
P = numpy.dot(V, U.T)
f = self.getObjectiveValue(self.tau, P, g1, g2)
return (evaluation, f, P, SW1, SW2, SK1, SK2)
else:
return evaluation
示例3: __updateEigenSystem
def __updateEigenSystem(self, lmbda, Q, deltaW, W):
"""
Give the eigenvalues lmbda, eigenvectors Q and a deltaW matrix of weight
changes, compute sequence of incidence vectors and update eigensystem.
The deltaW is the change in edges from the current weight martrix which
is given by W.
"""
changeInds = deltaW.nonzero()
for s in range(changeInds[0].shape[0]):
Util.printIteration(s, 10, changeInds[0].shape[0])
i = changeInds[0][s]
j = changeInds[1][s]
if i>=j: # only consider lower diagonal changes
continue
assert deltaW[i, j] != 0
# if deltaW[i, j] < 0:
# logging.warn(" deltaW is usually positive (here deltaW=" +str(deltaW[i, j]) + ")")
#Note: update W at each iteration here
lmbda, Q = self.incrementEigenSystem(lmbda, Q, W, i, j, deltaW[i,j])
W[i, j] += deltaW[i, j]
W[j, i] += deltaW[i, j]
return lmbda, Q
示例4: predictEdges
def predictEdges(self, vertexIndices):
"""
This makes a prediction for a series of edges using the following score
\sum_z \in n(x) \cup n(y) = 1/|log(n(z)|
Returns a matrix with rows are a ranked list of verticies of length self.windowSize.
"""
Parameter.checkInt(self.windowSize, 1, self.graph.getNumVertices())
logging.info("Running predictEdges in " + str(self.__class__.__name__))
P = numpy.zeros((vertexIndices.shape[0], self.windowSize))
S = numpy.zeros((vertexIndices.shape[0], self.windowSize))
W = self.graph.getWeightMatrix()
for i in range(vertexIndices.shape[0]):
Util.printIteration(i, self.printStep, vertexIndices.shape[0])
scores = numpy.zeros(self.graph.getNumVertices())
for j in range(0, self.graph.getNumVertices()):
commonNeighbours = numpy.nonzero(W[vertexIndices[i], :] * W[j, :])[0]
for k in commonNeighbours:
q = numpy.log(numpy.nonzero(W[k, :])[0].shape[0])
if q != 0:
scores[j] = scores[j] + 1/q
P[i, :], S[i, :] = self.indicesFromScores(vertexIndices[i], scores)
return P, S
示例5: cleanXML
def cleanXML(self):
"""
Take the original XML file and clean up HTML characters and & symbols. We
also create a list of possible matches for the experts.
"""
if not os.path.exists(self.xmlCleanFilename):
logging.debug("Cleaning XML")
h = HTMLParser.HTMLParser()
inFile = open(self.xmlFileName)
outFile = open(self.xmlCleanFilename, "w")
i = 0
for line in inFile:
Util.printIteration(i, self.stepSize, self.numLines)
outLine = h.unescape(line).replace("&", "&")
outLine = re.sub("<title>.*[\<\>].*</title>", "<title>Default Title</title>", outLine)
outLine = re.sub("<ee>.*[\<\>].*</ee>", "<ee>Default text</ee>", outLine)
outFile.write(outLine)
i += 1
inFile.close()
outFile.close()
logging.debug("All done")
else:
logging.debug("File already generated: " + self.xmlCleanFilename)
示例6: simulateModel
def simulateModel(theta):
"""
The parameter t is the particle index.
"""
logging.debug("theta=" + str(theta))
#We start with the observed graph at the start date
graph = targetGraph.subgraph(targetGraph.removedIndsAt(startDate))
graph.addVertices(M-graph.size)
p = Util.powerLawProbs(alpha, zeroVal)
hiddenDegSeq = Util.randomChoice(p, graph.getNumVertices())
featureInds = numpy.ones(graph.vlist.getNumFeatures(), numpy.bool)
featureInds[HIVVertices.dobIndex] = False
featureInds[HIVVertices.infectionTimeIndex] = False
featureInds[HIVVertices.hiddenDegreeIndex] = False
featureInds[HIVVertices.stateIndex] = False
featureInds = numpy.arange(featureInds.shape[0])[featureInds]
matcher = GraphMatch(matchAlg, alpha=matchAlpha, featureInds=featureInds, useWeightM=False)
graphMetrics = HIVGraphMetrics2(targetGraph, breakSize, matcher, float(endDate))
recordStep = (endDate-startDate)/float(numRecordSteps)
rates = HIVRates(graph, hiddenDegSeq)
model = HIVEpidemicModel(graph, rates, T=float(endDate), T0=float(startDate), metrics=graphMetrics)
model.setRecordStep(recordStep)
model.setParams(theta)
model.simulate()
objective = model.objective()
return objective
示例7: predict
def predict(self, X):
"""
Make a prediction for a set of examples given as the rows of the matrix X.
:param X: A matrix with examples as rows
:type X: :class:`ndarray`
"""
Util.abstract()
示例8: eigenAdd
def eigenAdd(omega, Q, Y, k):
"""
Perform an eigen update of the form A*A + Y*Y in which Y is a low-rank matrix
and A^*A = Q Omega Q*. We use the rank-k approximation of A: Q_k Omega_k Q_k^*
and then approximate [A^*A_k Y^*Y]_k.
"""
#logging.debug("< eigenAdd >")
Parameter.checkInt(k, 0, omega.shape[0])
#if not numpy.isrealobj(omega) or not numpy.isrealobj(Q):
# raise ValueError("Eigenvalues and eigenvectors must be real")
if omega.ndim != 1:
raise ValueError("omega must be 1-d array")
if omega.shape[0] != Q.shape[1]:
raise ValueError("Must have same number of eigenvalues and eigenvectors")
if __debug__:
Parameter.checkOrthogonal(Q, tol=EigenUpdater.tol, softCheck=True, arrayInfo="input Q in eigenAdd()")
#Taking the abs of the eigenvalues is correct
inds = numpy.flipud(numpy.argsort(numpy.abs(omega)))
omega, Q = Util.indEig(omega, Q, inds[numpy.abs(omega)>EigenUpdater.tol])
Omega = numpy.diag(omega)
YY = Y.conj().T.dot(Y)
QQ = Q.dot(Q.conj().T)
Ybar = Y - Y.dot(QQ)
Pbar, sigmaBar, Qbar = numpy.linalg.svd(Ybar, full_matrices=False)
inds = numpy.flipud(numpy.argsort(numpy.abs(sigmaBar)))
inds = inds[numpy.abs(sigmaBar)>EigenUpdater.tol]
Pbar, sigmaBar, Qbar = Util.indSvd(Pbar, sigmaBar, Qbar, inds)
SigmaBar = numpy.diag(sigmaBar)
Qbar = Ybar.T.dot(Pbar)
Qbar = Qbar.dot(numpy.diag(numpy.diag(Qbar.T.dot(Qbar))**-0.5))
r = sigmaBar.shape[0]
YQ = Y.dot(Q)
Zeros = numpy.zeros((r, omega.shape[0]))
D = numpy.c_[Q, Qbar]
YYQQ = YY.dot(QQ)
Z = D.conj().T.dot(YYQQ + YYQQ.conj().T).dot(D)
F = numpy.c_[numpy.r_[Omega - YQ.conj().T.dot(YQ), Zeros], numpy.r_[Zeros.T, SigmaBar.conj().dot(SigmaBar)]]
F = F + Z
pi, H = scipy.linalg.eigh(F)
inds = numpy.flipud(numpy.argsort(numpy.abs(pi)))
H = H[:, inds[0:k]]
pi = pi[inds[0:k]]
V = D.dot(H)
#logging.debug("</ eigenAdd >")
return pi, V
示例9: evaluateLearn
def evaluateLearn(X, y, idx, learnModel, predict, metricMethod, progress=True):
"""
Evaluate this learning algorithm using the given list of training/test splits
The metricMethod is a method which takes (predictedY, realY) as input
and returns a metric about the quality of the evaluation.
:param X: A matrix with examples as rows
:type X: :class:`ndarray`
:param y: A vector of labels
:type y: :class:`ndarray`
:param idx: A list of training/test splits
:type idx: :class:`list`
:param learnModel: A function such that learnModel(X, y) finds a mapping from X to y
:type learnModel: :class:`function`
:param predict: A function such that predict(X) makes predictions for X
:type predict: :class:`function`
:param metricMethod: A function such that metricMethod(predY, testY) returns the quality of predicted labels predY
:type metricMethod: :class:`function`
Output: the mean and variation of the cross validation folds.
"""
#Parameter.checkClass(idx, list)
Parameter.checkClass(X, numpy.ndarray)
Parameter.checkArray(X, softCheck=True)
Parameter.checkInt(X.shape[0], 1, float('inf'))
Parameter.checkClass(y, numpy.ndarray)
Parameter.checkArray(y, softCheck=True)
if y.ndim != 1:
raise ValueError("Dimention of y must be 1")
i = 0
metrics = numpy.zeros(len(idx))
logging.debug("EvaluateLearn: Using " + str(len(idx)) + " splits on " + str(X.shape[0]) + " examples")
for idxtr, idxts in idx:
if progress:
Util.printConciseIteration(i, 1, len(idx))
trainX, testX = X[idxtr, :], X[idxts, :]
trainY, testY = y[idxtr], y[idxts]
#logging.debug("Distribution of labels in evaluateLearn train: " + str(numpy.bincount(trainY)))
#logging.debug("Distribution of labels in evaluateLearn test: " + str(numpy.bincount(testY)))
learnModel(trainX, trainY)
predY = predict(testX)
gc.collect()
metrics[i] = metricMethod(predY, testY)
i += 1
return metrics
示例10: testExpandIntArray
def testExpandIntArray(self):
v = numpy.array([1, 3, 2, 4], numpy.int)
w = Util.expandIntArray(v)
self.assertTrue((w == numpy.array([0,1,1,1,2,2,3,3,3,3], numpy.int)).all())
v = numpy.array([], numpy.int)
w = Util.expandIntArray(v)
self.assertTrue((w == numpy.array([], numpy.int)).all())
示例11: addRows
def addRows(U, s, V, B, k=None):
"""
Find the SVD of a matrix [A ; B] where A = U diag(s) V.T. Uses the QR
decomposition to find an orthogonal basis on B.
:param U: The left singular vectors of A
:param s: The singular values of A
:param V: The right singular vectors of A
:param B: The matrix to append to A
"""
if V.shape[0] != B.shape[1]:
raise ValueError("U must have same number of rows as B cols")
if s.shape[0] != U.shape[1]:
raise ValueError("Number of cols of U must be the same size as s")
if s.shape[0] != V.shape[1]:
raise ValueError("Number of cols of V must be the same size as s")
if k == None:
k = U.shape[1]
m, p = U.shape
r = B.shape[0]
C = B.T - V.dot(V.T).dot(B.T)
Q, R = numpy.linalg.qr(C)
rPrime = Util.rank(C)
Q = Q[:, 0:rPrime]
R = R[0:rPrime, :]
D = numpy.c_[numpy.diag(s), numpy.zeros((p, rPrime))]
E = numpy.c_[B.dot(V), R.T]
D = numpy.r_[D, E]
G1 = numpy.c_[U, numpy.zeros((m, r))]
G2 = numpy.c_[numpy.zeros((r, p)), numpy.eye(r)]
G = numpy.r_[G1, G2]
H = numpy.c_[V, Q]
nptst.assert_array_almost_equal(G.T.dot(G), numpy.eye(G.shape[1]))
nptst.assert_array_almost_equal(H.T.dot(H), numpy.eye(H.shape[1]))
nptst.assert_array_almost_equal(G.dot(D).dot(H.T), numpy.r_[(U*s).dot(V.T), B])
Uhat, sHat, Vhat = numpy.linalg.svd(D, full_matrices=False)
inds = numpy.flipud(numpy.argsort(sHat))[0:k]
Uhat, sHat, Vhat = Util.indSvd(Uhat, sHat, Vhat, inds)
#The best rank k approximation of [A ; B]
Utilde = G.dot(Uhat)
Stilde = sHat
Vtilde = H.dot(Vhat)
return Utilde, Stilde, Vtilde
示例12: evaluateCvOuter
def evaluateCvOuter(self, X, Y, folds):
"""
Run cross validation and output some ROC curves. In this case Y is a 1D array.
:param X: A matrix with examples as rows
:type X: :class:`ndarray`
:param y: A vector of labels
:type y: :class:`ndarray`
:param folds: The number of cross validation folds
:type folds: :class:`int`
"""
Parameter.checkClass(X, numpy.ndarray)
Parameter.checkClass(Y, numpy.ndarray)
Parameter.checkInt(folds, 2, float('inf'))
if Y.ndim != 1:
raise ValueError("Expecting Y to be 1D")
indexList = cross_val.StratifiedKFold(Y, folds)
bestParams = []
bestTrainAUCs = numpy.zeros(folds)
bestTrainROCs = []
bestTestAUCs = numpy.zeros(folds)
bestTestROCs = []
bestMetaDicts = []
i = 0
for trainInds, testInds in indexList:
Util.printIteration(i, 1, folds, "Outer CV: ")
trainX, trainY = X[trainInds, :], Y[trainInds]
testX, testY = X[testInds, :], Y[testInds]
self.learnModel(trainX, trainY)
#self.learnModelCut(trainX, trainY)
predTrainY = self.predict(trainX)
predTestY = self.predict(testX)
bestTrainAUCs[i] = Evaluator.auc(predTrainY, trainY)
bestTestAUCs[i] = Evaluator.auc(predTestY, testY)
#Store the parameters and ROC curves
bestTrainROCs.append(Evaluator.roc(trainY, predTrainY))
bestTestROCs.append(Evaluator.roc(testY, predTestY))
metaDict = {}
bestMetaDicts.append(metaDict)
i += 1
logging.debug("Mean test AUC = " + str(numpy.mean(bestTestAUCs)))
logging.debug("Std test AUC = " + str(numpy.std(bestTestAUCs)))
allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs]
return (bestParams, allMetrics, bestMetaDicts)
示例13: eigpsd
def eigpsd(X, n):
"""
Find the eigenvalues and eigenvectors of a positive semi-definite symmetric matrix.
The input matrix X can be a numpy array or a scipy sparse matrix. In the case that
n==X.shape[0] we convert to an ndarray.
:param X: The matrix to find the eigenvalues of.
:type X: :class:`ndarray`
:param n: If n is an int, then it is the number of columns to sample otherwise n is an array of column indices.
:return lmbda: The set of eigenvalues
:return V: The matrix of eigenvectors as a ndarray
"""
if type(n) == int:
n = min(n, X.shape[0])
inds = numpy.sort(numpy.random.permutation(X.shape[0])[0:n])
elif type(n) == numpy.ndarray:
inds = numpy.sort(n)
else:
raise ValueError("Invalid n value: " + str(n))
invInds = numpy.setdiff1d(numpy.arange(X.shape[0]), inds)
if inds.shape[0] == X.shape[0] and (inds == numpy.arange(X.shape[0])).all():
if scipy.sparse.issparse(X):
X = numpy.array(X.todense())
lmbda, V = Util.safeEigh(X)
return lmbda, V
tmp = X[inds, :]
A = tmp[:, inds]
B = tmp[:, invInds]
if scipy.sparse.issparse(X):
A = numpy.array(A.todense())
BB = numpy.array((B.dot(B.T)).todense())
else:
BB = B.dot(B.T)
# Following line is very slow
# Am12 = scipy.linalg.sqrtm(numpy.linalg.pinv(A))
Am12 = Util.matrixPowerh(A, -0.5)
S = A + Am12.dot(BB).dot(Am12)
S = (S.T + S) / 2
lmbda, U = Util.safeEigh(S)
tol = 10 ** -10
lmbdaN = lmbda.copy()
lmbdaN[numpy.abs(lmbda) < tol] = 0
lmbdaN[numpy.abs(lmbda) > tol] = lmbdaN[numpy.abs(lmbda) > tol] ** -0.5
V = X[:, inds].dot(Am12.dot(U) * lmbdaN)
return lmbda, V
示例14: testEntropy
def testEntropy(self):
v = numpy.array([0, 0, 0, 1, 1, 1])
self.assertEquals(Util.entropy(v), 1)
v = numpy.array([0, 0, 0])
self.assertEquals(Util.entropy(v), 0)
v = numpy.array([1, 1, 1])
self.assertEquals(Util.entropy(v), 0)
示例15: supervisedMC23
def supervisedMC23(lists, itemList, topQList, verbose=False):
"""
A supervised version of MC2 of our own invention. The idea is to find a
linear combination of transition matrices to fit a given one. We just make
sure it fits the stationary distribution.
"""
import cvxopt
import cvxopt.solvers
ell = len(lists)
n = len(itemList)
outputList, scores, PList = RankAggregator.MC2(lists, itemList, verbose=True)
Py = RankAggregator.generateTransitionMatrix(topQList, itemList)
u, v = scipy.sparse.linalg.eigs(Py.T, 1)
v = numpy.array(v).flatten()
c = numpy.zeros(v.shape[0])
for i, P in enumerate(PList):
Q[:, i] = cvxopt.matrix(numpy.array(P.todense()).ravel())
c = cvxopt.matrix(c)
QQ = Q.T * Q
Py = RankAggregator.generateTransitionMatrix(topQList, itemList)
s = numpy.array(Py.todense()).ravel()
s = cvxopt.matrix(s)
G = cvxopt.spdiag((-numpy.ones(ell)).tolist())
h = cvxopt.matrix(numpy.zeros(ell))
A = cvxopt.matrix(numpy.ones(ell), (1, ell))
b = cvxopt.matrix(numpy.ones(1))
q = -Q.T * s
sol = cvxopt.solvers.qp(QQ, q, G, h, A, b)
alpha = numpy.array(sol['x'])
#Combine the matrices
P = numpy.zeros((n, n))
for j, Pj in enumerate(PList):
Util.printIteration(j, 1, ell)
P += alpha[j] * numpy.array(Pj.todense())
P /= ell
outputList, scores = RankAggregator.computeOutputList(P, itemList)
if verbose:
return outputList, scores, PList
else:
return outputList, scores