本文整理汇总了Python中sandbox.util.Util.Util.printIteration方法的典型用法代码示例。如果您正苦于以下问题:Python Util.printIteration方法的具体用法?Python Util.printIteration怎么用?Python Util.printIteration使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sandbox.util.Util.Util
的用法示例。
在下文中一共展示了Util.printIteration方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: cleanXML
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
def cleanXML(self):
"""
Take the original XML file and clean up HTML characters and & symbols. We
also create a list of possible matches for the experts.
"""
if not os.path.exists(self.xmlCleanFilename):
logging.debug("Cleaning XML")
h = HTMLParser.HTMLParser()
inFile = open(self.xmlFileName)
outFile = open(self.xmlCleanFilename, "w")
i = 0
for line in inFile:
Util.printIteration(i, self.stepSize, self.numLines)
outLine = h.unescape(line).replace("&", "&")
outLine = re.sub("<title>.*[\<\>].*</title>", "<title>Default Title</title>", outLine)
outLine = re.sub("<ee>.*[\<\>].*</ee>", "<ee>Default text</ee>", outLine)
outFile.write(outLine)
i += 1
inFile.close()
outFile.close()
logging.debug("All done")
else:
logging.debug("File already generated: " + self.xmlCleanFilename)
示例2: __updateEigenSystem
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
def __updateEigenSystem(self, lmbda, Q, deltaW, W):
"""
Give the eigenvalues lmbda, eigenvectors Q and a deltaW matrix of weight
changes, compute sequence of incidence vectors and update eigensystem.
The deltaW is the change in edges from the current weight martrix which
is given by W.
"""
changeInds = deltaW.nonzero()
for s in range(changeInds[0].shape[0]):
Util.printIteration(s, 10, changeInds[0].shape[0])
i = changeInds[0][s]
j = changeInds[1][s]
if i>=j: # only consider lower diagonal changes
continue
assert deltaW[i, j] != 0
# if deltaW[i, j] < 0:
# logging.warn(" deltaW is usually positive (here deltaW=" +str(deltaW[i, j]) + ")")
#Note: update W at each iteration here
lmbda, Q = self.incrementEigenSystem(lmbda, Q, W, i, j, deltaW[i,j])
W[i, j] += deltaW[i, j]
W[j, i] += deltaW[i, j]
return lmbda, Q
示例3: predictEdges
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
def predictEdges(self, vertexIndices):
"""
This makes a prediction for a series of edges using the following score
\sum_z \in n(x) \cup n(y) = 1/|log(n(z)|
Returns a matrix with rows are a ranked list of verticies of length self.windowSize.
"""
Parameter.checkInt(self.windowSize, 1, self.graph.getNumVertices())
logging.info("Running predictEdges in " + str(self.__class__.__name__))
P = numpy.zeros((vertexIndices.shape[0], self.windowSize))
S = numpy.zeros((vertexIndices.shape[0], self.windowSize))
W = self.graph.getWeightMatrix()
for i in range(vertexIndices.shape[0]):
Util.printIteration(i, self.printStep, vertexIndices.shape[0])
scores = numpy.zeros(self.graph.getNumVertices())
for j in range(0, self.graph.getNumVertices()):
commonNeighbours = numpy.nonzero(W[vertexIndices[i], :] * W[j, :])[0]
for k in commonNeighbours:
q = numpy.log(numpy.nonzero(W[k, :])[0].shape[0])
if q != 0:
scores[j] = scores[j] + 1/q
P[i, :], S[i, :] = self.indicesFromScores(vertexIndices[i], scores)
return P, S
示例4: readAuthorsAndDocuments
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
def readAuthorsAndDocuments(self, useAbstract=True):
logging.debug("About to read file " + self.dataFilename)
inFile = open(self.dataFilename)
authorList = []
citationList = []
documentList = []
lastAbstract = ""
lastVenue = ""
lastTitle = ""
lastAuthors = []
lastCitationNo = 0
for i, line in enumerate(inFile):
Util.printIteration(i, self.stepSize, self.numLines)
#Match the fields in the file
emptyLine = line == "\n"
title = re.findall("#\*(.*)", line)
currentAuthors = re.findall("#@(.*)", line)
abstract = re.findall("#!(.*)", line)
venue = re.findall("#conf(.*)", line)
citationNo = re.findall("#citation(.*)", line)
if emptyLine:
if useAbstract:
document = lastTitle + " " + lastAbstract
else:
document = lastTitle
documentList.append(document)
authorList.append(lastAuthors)
citationList.append(lastCitationNo)
lastAbstract = ""
lastTitle = ""
lastAuthors = []
lastCitationNo = 0
if len(title) != 0 and len(title[0]) != 0:
lastTitle = title[0]
if len(venue) != 0 and len(venue[0]) != 0:
lastVenue = venue[0]
if len(abstract) != 0 and len(abstract[0]) != 0:
lastAbstract = abstract[0]
if len(citationNo) != 0 and len(citationNo[0]) != 0:
lastCitationNo = int(citationNo[0])
if len(currentAuthors) != 0:
currentAuthors = currentAuthors[0].split(",")
currentAuthors = set([x.strip() for x in currentAuthors])
currentAuthors = currentAuthors.difference(set([""]))
lastAuthors = currentAuthors
inFile.close()
logging.debug("Finished reading " + str(len(documentList)) + " articles")
return authorList, documentList, citationList
示例5: evaluateCvOuter
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
def evaluateCvOuter(self, X, Y, folds):
"""
Run cross validation and output some ROC curves. In this case Y is a 1D array.
:param X: A matrix with examples as rows
:type X: :class:`ndarray`
:param y: A vector of labels
:type y: :class:`ndarray`
:param folds: The number of cross validation folds
:type folds: :class:`int`
"""
Parameter.checkClass(X, numpy.ndarray)
Parameter.checkClass(Y, numpy.ndarray)
Parameter.checkInt(folds, 2, float('inf'))
if Y.ndim != 1:
raise ValueError("Expecting Y to be 1D")
indexList = cross_val.StratifiedKFold(Y, folds)
bestParams = []
bestTrainAUCs = numpy.zeros(folds)
bestTrainROCs = []
bestTestAUCs = numpy.zeros(folds)
bestTestROCs = []
bestMetaDicts = []
i = 0
for trainInds, testInds in indexList:
Util.printIteration(i, 1, folds, "Outer CV: ")
trainX, trainY = X[trainInds, :], Y[trainInds]
testX, testY = X[testInds, :], Y[testInds]
self.learnModel(trainX, trainY)
#self.learnModelCut(trainX, trainY)
predTrainY = self.predict(trainX)
predTestY = self.predict(testX)
bestTrainAUCs[i] = Evaluator.auc(predTrainY, trainY)
bestTestAUCs[i] = Evaluator.auc(predTestY, testY)
#Store the parameters and ROC curves
bestTrainROCs.append(Evaluator.roc(trainY, predTrainY))
bestTestROCs.append(Evaluator.roc(testY, predTestY))
metaDict = {}
bestMetaDicts.append(metaDict)
i += 1
logging.debug("Mean test AUC = " + str(numpy.mean(bestTestAUCs)))
logging.debug("Std test AUC = " + str(numpy.std(bestTestAUCs)))
allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs]
return (bestParams, allMetrics, bestMetaDicts)
示例6: supervisedMC23
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
def supervisedMC23(lists, itemList, topQList, verbose=False):
"""
A supervised version of MC2 of our own invention. The idea is to find a
linear combination of transition matrices to fit a given one. We just make
sure it fits the stationary distribution.
"""
import cvxopt
import cvxopt.solvers
ell = len(lists)
n = len(itemList)
outputList, scores, PList = RankAggregator.MC2(lists, itemList, verbose=True)
Py = RankAggregator.generateTransitionMatrix(topQList, itemList)
u, v = scipy.sparse.linalg.eigs(Py.T, 1)
v = numpy.array(v).flatten()
c = numpy.zeros(v.shape[0])
for i, P in enumerate(PList):
Q[:, i] = cvxopt.matrix(numpy.array(P.todense()).ravel())
c = cvxopt.matrix(c)
QQ = Q.T * Q
Py = RankAggregator.generateTransitionMatrix(topQList, itemList)
s = numpy.array(Py.todense()).ravel()
s = cvxopt.matrix(s)
G = cvxopt.spdiag((-numpy.ones(ell)).tolist())
h = cvxopt.matrix(numpy.zeros(ell))
A = cvxopt.matrix(numpy.ones(ell), (1, ell))
b = cvxopt.matrix(numpy.ones(1))
q = -Q.T * s
sol = cvxopt.solvers.qp(QQ, q, G, h, A, b)
alpha = numpy.array(sol['x'])
#Combine the matrices
P = numpy.zeros((n, n))
for j, Pj in enumerate(PList):
Util.printIteration(j, 1, ell)
P += alpha[j] * numpy.array(Pj.todense())
P /= ell
outputList, scores = RankAggregator.computeOutputList(P, itemList)
if verbose:
return outputList, scores, PList
else:
return outputList, scores
示例7: learnModel
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
def learnModel(self, graph):
"""
Learn a prediction model based on considering ego networks as independent.
For each ego, X contains a list of neighbours and the corresponding labels
are the values of the edge labels. We then find the set of primal weights
w for each ego network and then regress onto the set of weights using the
ego labels.
:param graph: The input graph to learn from.
:type graph: class:`apgl.graph.AbstractSingleGraph`
"""
logging.info("Learning model on graph of size " + str(graph.getNumVertices()))
logging.info("EgoLearner: " + str(self.egoRegressor))
logging.info("AlterLearner: " + str(self.alterRegressor))
allIndices = numpy.arange(0, graph.getNumVertices())
V = graph.getVertexList().getVertices(list(allIndices))
W = numpy.zeros((0, graph.getVertexList().getNumFeatures()))
Xe = numpy.zeros((0, graph.getVertexList().getNumFeatures()))
printStep = numpy.floor(graph.getNumVertices()/10)
alterError = 0.0
for i in range(graph.getNumVertices()):
Util.printIteration(i, printStep, graph.getNumVertices())
neighbours = graph.neighbours(i)
if neighbours.shape[0] != 0:
X = V[neighbours, :]
y = numpy.ones(X.shape[0])
for j in range(neighbours.shape[0]):
y[j] = graph.getEdge(i, neighbours[j])
w = self.alterRegressor.learnModel(X, y)
#alterError = numpy.mean(numpy.abs(self.alterRegressor.predict(X) - y))
W = numpy.r_[W, numpy.array([w])]
Xe = numpy.r_[Xe, numpy.array([V[i, :]])]
#Now we need to solve least to find regressor of Xe onto W
logging.info("Finding regression matrix onto weights using matrix of size " + str(Xe.shape))
gc.collect()
#self.standardiser = Standardiser()
#self.standardiser2 = Standardiser()
#Xe = self.standardiser.standardiseArray(Xe)
#W = self.standardiser2.standardiseArray(W)
self.egoRegressor.learnModel(Xe, W)
return W
示例8: modelSelect
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
def modelSelect(self, X):
"""
Perform model selection on X and return the best parameters.
"""
m, n = X.shape
cvInds = Sampling.randCrossValidation(self.folds, X.nnz)
localAucs = numpy.zeros((self.ks.shape[0], self.lmbdas.shape[0], len(cvInds)))
logging.debug("Performing model selection")
paramList = []
for icv, (trainInds, testInds) in enumerate(cvInds):
Util.printIteration(icv, 1, self.folds, "Fold: ")
trainX = SparseUtils.submatrix(X, trainInds)
testX = SparseUtils.submatrix(X, testInds)
testOmegaList = SparseUtils.getOmegaList(testX)
for i, k in enumerate(self.ks):
maxLocalAuc = self.copy()
maxLocalAuc.k = k
paramList.append((trainX, testX, testOmegaList, maxLocalAuc))
pool = multiprocessing.Pool(processes=self.numProcesses, maxtasksperchild=100)
resultsIterator = pool.imap(localAucsLmbdas, paramList, self.chunkSize)
#import itertools
#resultsIterator = itertools.imap(localAucsLmbdas, paramList)
for icv, (trainInds, testInds) in enumerate(cvInds):
for i, k in enumerate(self.ks):
tempAucs = resultsIterator.next()
localAucs[i, :, icv] = tempAucs
pool.terminate()
meanLocalAucs = numpy.mean(localAucs, 2)
stdLocalAucs = numpy.std(localAucs, 2)
logging.debug(meanLocalAucs)
k = self.ks[numpy.unravel_index(numpy.argmax(meanLocalAucs), meanLocalAucs.shape)[0]]
lmbda = self.lmbdas[numpy.unravel_index(numpy.argmax(meanLocalAucs), meanLocalAucs.shape)[1]]
logging.debug("Model parameters: k=" + str(k) + " lmbda=" + str(lmbda))
self.k = k
self.lmbda = lmbda
return meanLocalAucs, stdLocalAucs
示例9: supervisedMC22
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
def supervisedMC22(lists, itemList, topQList, verbose=False):
"""
A supervised version of MC2 of our own invention. The idea is to find a
linear combination of transition matrices to fit a given one.
"""
import cvxopt
import cvxopt.solvers
ell = len(lists)
n = len(itemList)
outputList, scores, PList = RankAggregator.MC2(lists, itemList, verbose=True)
Q = cvxopt.spmatrix([], [], [], (n*n, len(lists)))
for i, P in enumerate(PList):
#print(P.todense())
Q[:, i] = cvxopt.matrix(numpy.array(P.todense()).ravel())
QQ = Q.T * Q
Py = RankAggregator.generateTransitionMatrix(topQList, itemList)
s = numpy.array(Py.todense()).ravel()
s = cvxopt.matrix(s)
G = cvxopt.spdiag((-numpy.ones(ell)).tolist())
h = cvxopt.matrix(numpy.zeros(ell))
A = cvxopt.matrix(numpy.ones(ell), (1, ell))
b = cvxopt.matrix(numpy.ones(1))
q = -Q.T * s
sol = cvxopt.solvers.qp(QQ, q, G, h, A, b)
alpha = numpy.array(sol['x'])
#Combine the matrices
P = numpy.zeros((n, n))
for j, Pj in enumerate(PList):
Util.printIteration(j, 1, ell)
P += alpha[j] * numpy.array(Pj.todense())
P /= ell
outputList, scores = RankAggregator.computeOutputList(P, itemList)
if verbose:
return outputList, scores, PList
else:
return outputList, scores
示例10: modelSelect
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
def modelSelect(self, X):
"""
Perform model selection on X and return the best parameters.
"""
m, n = X.shape
cvInds = Sampling.randCrossValidation(self.folds, X.nnz)
precisions = numpy.zeros((self.ks.shape[0], len(cvInds)))
logging.debug("Performing model selection")
paramList = []
for icv, (trainInds, testInds) in enumerate(cvInds):
Util.printIteration(icv, 1, self.folds, "Fold: ")
trainX = SparseUtils.submatrix(X, trainInds)
testX = SparseUtils.submatrix(X, testInds)
testOmegaList = SparseUtils.getOmegaList(testX)
for i, k in enumerate(self.ks):
learner = self.copy()
learner.k = k
paramList.append((trainX, testX, testOmegaList, learner))
#pool = multiprocessing.Pool(processes=self.numProcesses, maxtasksperchild=100)
#resultsIterator = pool.imap(computePrecision, paramList, self.chunkSize)
import itertools
resultsIterator = itertools.imap(computePrecision, paramList)
for icv, (trainInds, testInds) in enumerate(cvInds):
for i, k in enumerate(self.ks):
tempPrecision = resultsIterator.next()
precisions[i, icv] = tempPrecision
#pool.terminate()
meanPrecisions = numpy.mean(precisions, 1)
stdPrecisions = numpy.std(precisions, 1)
logging.debug(meanPrecisions)
k = self.ks[numpy.argmax(meanPrecisions)]
logging.debug("Model parameters: k=" + str(k))
self.k = k
return meanPrecisions, stdPrecisions
示例11: evaluateCvOuter
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
def evaluateCvOuter(self, X, Y, folds, leafRank):
"""
Run cross validation and output some ROC curves. In this case Y is a 1D array.
"""
Parameter.checkClass(X, numpy.ndarray)
Parameter.checkClass(Y, numpy.ndarray)
Parameter.checkInt(folds, 2, float('inf'))
if Y.ndim != 1:
raise ValueError("Expecting Y to be 1D")
indexList = cross_val.StratifiedKFold(Y, folds)
self.setLeafRank(leafRank)
bestParams = []
bestTrainAUCs = numpy.zeros(folds)
bestTrainROCs = []
bestTestAUCs = numpy.zeros(folds)
bestTestROCs = []
bestMetaDicts = []
i = 0
for trainInds, testInds in indexList:
Util.printIteration(i, 1, folds)
trainX, trainY = X[trainInds, :], Y[trainInds]
testX, testY = X[testInds, :], Y[testInds]
logging.debug("Distribution of labels in train: " + str(numpy.bincount(trainY)))
logging.debug("Distribution of labels in test: " + str(numpy.bincount(testY)))
self.learnModel(trainX, trainY)
predTrainY = self.predict(trainX)
predTestY = self.predict(testX)
bestTrainAUCs[i] = Evaluator.auc(predTrainY, trainY)
bestTestAUCs[i] = Evaluator.auc(predTestY, testY)
#Store the parameters and ROC curves
bestTrainROCs.append(Evaluator.roc(trainY, predTrainY))
bestTestROCs.append(Evaluator.roc(testY, predTestY))
metaDict = {}
bestMetaDicts.append(metaDict)
i += 1
logging.debug("Mean test AUC = " + str(numpy.mean(bestTestAUCs)))
logging.debug("Std test AUC = " + str(numpy.std(bestTestAUCs)))
allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs]
return (bestParams, allMetrics, bestMetaDicts)
示例12: generate_data_file
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
def generate_data_file(dir, nb_user=None):
logging.debug("nb_user: " + str(nb_user))
BemolData.assert_nb_user(nb_user)
if nb_user == None:
nb_user = BemolData.nb_max_user()
# generate the file containing all the dataset
# !!!!! security failure TOCTTOU
f_data_name = BemolData.get_file_name(dir, BemolData.nb_max_user())
if not os.path.exists(f_data_name):
logging.info("creating file " + str(f_data_name))
shutil.copy(BemolData.get_file_name(dir, None), f_data_name)
# other files to generate
nb_user_to_generate = []
current_nb_user = BemolData.get_nb_user_to_read(nb_user)
logging.debug("current_nb_user before while: " + str(current_nb_user))
# !!!!! security failure TOCTTOU
while (not os.path.exists(BemolData.get_file_name(dir, current_nb_user))):
logging.debug("current_nb_user in while: " + str(current_nb_user))
nb_user_to_generate.append(current_nb_user)
current_nb_user = BemolData.get_nb_user_to_read(current_nb_user+1)
nb_user_to_generate.reverse()
# generate other files
for current_nb_user in nb_user_to_generate:
# read data
f_existing_data_name = BemolData.get_file_name(dir, current_nb_user+1)
f_to_create_data_name = BemolData.get_file_name(dir, current_nb_user)
logging.info("creating file " + f_to_create_data_name)
dict_user = MyDictionary()
try:
f_existing_data = gzip.open(f_existing_data_name, 'rb')
f_to_create_data = gzip.open(f_to_create_data_name, 'wb')
i = 0
i_max = BemolData.get_nb_line(f_existing_data_name)
for line in f_existing_data:
Util.printIteration(i, 1000, i_max); i += 1
m = re.match("(\d+)\s(\d+)\s(\d+)\s(\d+)", line)
if dict_user.index(int(m.group(1))) < current_nb_user:
f_to_create_data.write(line)
except IOError as error:
if error.filename == f_existing_data:
raise RGIOError(error, RGIOError.indent() + 'it disappeared in the meanwhile')
else:
raise error
示例13: learnModel
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
def learnModel(self, graph):
"""
Learn a prediction model based on all of the edges of the input graph.
For each ego, X contains a list of neighbours and non-neighbours in the same
ratio, and y = 1 when for a neighbour otherwise -1. We then find the set of
primal weights w for each ego network and then regress onto the set of weights
using the ego labels.
One can either learn by comparing neighbours and non-neighbours, or alternatively
using the labels of edges and making prediction on unlabelled edges.
:param graph: The input graph to learn from.
:type graph: class:`apgl.graph.AbstractSingleGraph`
:param randomNegLabel: How to compute edge labels, False means use the labels
themselves, and True means randomly pick non-neighbours to have -1 labels
:type randomNegLabel: class `bool`
"""
Parameter.checkInt(self.windowSize, 1, graph.getNumVertices())
self.graph = graph
logging.info("Learning model on graph of size " + str(graph.getNumVertices()))
allIndices = numpy.arange(0, graph.getNumVertices())
V = graph.getVertexList().getVertices(allIndices)
W = numpy.zeros((0, graph.getVertexList().getNumFeatures()))
Xe = numpy.zeros((0, graph.getVertexList().getNumFeatures()))
printStep = numpy.floor(graph.getNumVertices()/10)
for i in range(graph.getNumVertices()):
Util.printIteration(i, printStep, graph.getNumVertices())
neighbours = graph.neighbours(i)
if neighbours.shape[0] != 0:
compNeighbours = numpy.setdiff1d(allIndices, neighbours)
perm = numpy.random.permutation(compNeighbours.shape[0])[0:neighbours.shape[0]]
negativeVertices = V[compNeighbours[perm], :]
X = numpy.r_[V[neighbours, :], negativeVertices]
y = numpy.ones(X.shape[0])
y[neighbours.shape[0]:] = -1
w = self.alterRegressor.learnModel(X, y)
W = numpy.r_[W, numpy.array([w])]
Xe = numpy.r_[Xe, numpy.array([V[i, :]])]
#Now we need to solve least to find regressor of Xe onto W
self.egoRegressor.learnModel(Xe, W)
示例14: coauthorsGraphFromAuthors
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
def coauthorsGraphFromAuthors(self, relevantExperts):
"""
Take a set of relevant authors and return the graph.
"""
dataFile = open(self.dataFilename)
authorIndexer = IdIndexer()
author1Inds = array.array("i")
author2Inds = array.array("i")
for relevantExpert in relevantExperts:
authorIndexer.append(relevantExpert)
for i, line in enumerate(dataFile):
Util.printIteration(i, self.stepSize, self.numLines)
authors = re.findall("#@(.*)", line)
if len(authors) != 0:
authors = set([x.strip() for x in authors[0].split(",")])
if len(authors.intersection(relevantExperts)) != 0:
iterator = itertools.combinations(authors, 2)
for author1, author2 in iterator:
if author1 in relevantExperts and author2 in relevantExperts:
author1Ind = authorIndexer.append(author1)
author2Ind = authorIndexer.append(author2)
author1Inds.append(author1Ind)
author2Inds.append(author2Ind)
logging.debug("Found " + str(len(authorIndexer.getIdDict())) + " coauthors")
#Coauthor graph is undirected
author1Inds = numpy.array(author1Inds, numpy.int)
author2Inds = numpy.array(author2Inds, numpy.int)
edges = numpy.c_[author1Inds, author2Inds]
graph = igraph.Graph()
graph.add_vertices(len(authorIndexer.getIdDict()))
graph.add_edges(edges)
graph.es["weight"] = numpy.ones(graph.ecount())
graph.simplify(combine_edges=sum)
graph.es["invWeight"] = 1.0/(numpy.array(graph.es["weight"]))
return graph, authorIndexer
示例15: MC2
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
def MC2(lists, itemList, alpha=None, verbose=False):
"""
Perform weighted rank aggregation using MC2 as given in Rank Aggregation Methods
for the Web, Dwork et al. The weighting vector is given by alpha.
:param lists: A list of lists. Each sublist is an ordered set of a subset of the items from itemList
:param itemList: A list of all possible items
:param alpha: A vector of weights for the transition matrices
"""
n = len(itemList)
ell = len(lists)
if alpha == None:
alpha = numpy.ones(ell)/ell
#P = numpy.zeros((n, n))
P = scipy.sparse.csr_matrix((n, n))
PList = []
logging.debug("Computing permutation matrices")
for j, lst in enumerate(lists):
Util.printIteration(j, 1, ell)
Pj = RankAggregator.generateTransitionMatrix(lst, itemList)
P = P + alpha[j] * Pj
PList.append(Pj)
P /= ell
logging.debug("Done")
outputList,scores = RankAggregator.computeOutputList(P, itemList)
if verbose:
return outputList, scores, PList
else:
return outputList, scores