Python Util.printIteration方法代码示例

本文整理汇总了Python中sandbox.util.Util.Util.printIteration方法的典型用法代码示例。如果您正苦于以下问题：Python Util.printIteration方法的具体用法？Python Util.printIteration怎么用？Python Util.printIteration使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sandbox.util.Util.Util的用法示例。

在下文中一共展示了Util.printIteration方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: cleanXML

# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
 def cleanXML(self):
     """
     Take the original XML file and clean up HTML characters and & symbols. We 
     also create a list of possible matches for the experts. 
     """
     if not os.path.exists(self.xmlCleanFilename):
         logging.debug("Cleaning XML")
         h = HTMLParser.HTMLParser()
         
         inFile = open(self.xmlFileName)
         outFile = open(self.xmlCleanFilename, "w")
         i = 0 
         
         for line in inFile: 
             Util.printIteration(i, self.stepSize, self.numLines)
             outLine = h.unescape(line).replace("&", "&amp;")
             outLine = re.sub("<title>.*[\<\>].*</title>", "<title>Default Title</title>", outLine)
             outLine = re.sub("<ee>.*[\<\>].*</ee>", "<ee>Default text</ee>", outLine)
             outFile.write(outLine) 
             i += 1
         
         inFile.close() 
         outFile.close() 
         logging.debug("All done")
     else: 
         logging.debug("File already generated: " + self.xmlCleanFilename)

开发者ID:charanpald，项目名称:wallhack，代码行数:28，代码来源:DBLPDataset.py

示例2: __updateEigenSystem

# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
    def __updateEigenSystem(self, lmbda, Q, deltaW, W):
        """
        Give the eigenvalues lmbda, eigenvectors Q and a deltaW matrix of weight
        changes, compute sequence of incidence vectors and update eigensystem.
        The deltaW is the change in edges from the current weight martrix which
        is given by W. 
        """
        changeInds = deltaW.nonzero()

        for s in range(changeInds[0].shape[0]):
            Util.printIteration(s, 10, changeInds[0].shape[0])
            i = changeInds[0][s]
            j = changeInds[1][s]
            if i>=j: # only consider lower diagonal changes
                continue

            assert deltaW[i, j] != 0
#            if deltaW[i, j] < 0:
#                logging.warn(" deltaW is usually positive (here deltaW=" +str(deltaW[i, j]) + ")")

            #Note: update W at each iteration here
            lmbda, Q = self.incrementEigenSystem(lmbda, Q, W, i, j, deltaW[i,j])
            W[i, j] += deltaW[i, j]
            W[j, i] += deltaW[i, j]
        
        return lmbda, Q

开发者ID:charanpald，项目名称:sandbox，代码行数:28，代码来源:NingSpectralClustering.py

示例3: predictEdges

# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
    def predictEdges(self, vertexIndices):
        """
        This makes a prediction for a series of edges using the following score
        \sum_z \in n(x) \cup n(y) = 1/|log(n(z)|
        Returns a matrix with rows are a ranked list of verticies of length self.windowSize.
        """

        Parameter.checkInt(self.windowSize, 1, self.graph.getNumVertices())
        logging.info("Running predictEdges in " + str(self.__class__.__name__))

        P = numpy.zeros((vertexIndices.shape[0], self.windowSize))
        S = numpy.zeros((vertexIndices.shape[0], self.windowSize))
        W = self.graph.getWeightMatrix()


        for i in range(vertexIndices.shape[0]):
            Util.printIteration(i, self.printStep, vertexIndices.shape[0])
            scores = numpy.zeros(self.graph.getNumVertices())

            for j in range(0, self.graph.getNumVertices()):
                commonNeighbours = numpy.nonzero(W[vertexIndices[i], :] * W[j, :])[0]

                for k in commonNeighbours:
                    q = numpy.log(numpy.nonzero(W[k, :])[0].shape[0])
                    if q != 0:
                        scores[j] = scores[j] + 1/q


            P[i, :], S[i, :] = self.indicesFromScores(vertexIndices[i], scores)

        return P, S

开发者ID:charanpald，项目名称:sandbox，代码行数:33，代码来源:AdamicPredictor.py

示例4: readAuthorsAndDocuments

# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
    def readAuthorsAndDocuments(self, useAbstract=True): 
        logging.debug("About to read file " + self.dataFilename)
        inFile = open(self.dataFilename)  
        authorList = []
        citationList = []
        documentList = []
                    
        lastAbstract = ""
        lastVenue = ""
        lastTitle = ""    
        lastAuthors = []     
        lastCitationNo = 0                
                    
        for i, line in enumerate(inFile):
            Util.printIteration(i, self.stepSize, self.numLines)
                
            #Match the fields in the file 
            emptyLine = line == "\n"
            title = re.findall("#\*(.*)", line)
            currentAuthors = re.findall("#@(.*)", line)  
            abstract = re.findall("#!(.*)", line)
            venue = re.findall("#conf(.*)", line)
            citationNo = re.findall("#citation(.*)", line)
            
            if emptyLine:
                if useAbstract: 
                    document = lastTitle + " " + lastAbstract 
                else: 
                    document = lastTitle     
                documentList.append(document) 
                authorList.append(lastAuthors)
                citationList.append(lastCitationNo)

                lastAbstract = ""
                lastTitle = ""
                lastAuthors = []
                lastCitationNo = 0   
 
            if len(title) != 0 and len(title[0]) != 0: 
                lastTitle = title[0]
                
            if len(venue) != 0 and len(venue[0]) != 0: 
                lastVenue = venue[0]  
            
            if len(abstract) != 0 and len(abstract[0]) != 0: 
                lastAbstract = abstract[0]
                
            if len(citationNo) != 0 and len(citationNo[0]) != 0: 
                lastCitationNo = int(citationNo[0])
                       
            if len(currentAuthors) != 0: 
                currentAuthors = currentAuthors[0].split(",")  
                currentAuthors = set([x.strip() for x in currentAuthors])
                currentAuthors = currentAuthors.difference(set([""]))
                lastAuthors = currentAuthors                     

        inFile.close() 
        logging.debug("Finished reading " + str(len(documentList)) + " articles")  
        
        return authorList, documentList, citationList

开发者ID:charanpald，项目名称:wallhack，代码行数:62，代码来源:ArnetMinerDataset.py

示例5: evaluateCvOuter

# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
    def evaluateCvOuter(self, X, Y, folds):
        """
        Run cross validation and output some ROC curves. In this case Y is a 1D array.

        :param X: A matrix with examples as rows
        :type X: :class:`ndarray`

        :param y: A vector of labels
        :type y: :class:`ndarray`

        :param folds: The number of cross validation folds
        :type folds: :class:`int`
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkInt(folds, 2, float('inf'))
        if Y.ndim != 1:
            raise ValueError("Expecting Y to be 1D")

        indexList = cross_val.StratifiedKFold(Y, folds)

        bestParams = []
        bestTrainAUCs = numpy.zeros(folds)
        bestTrainROCs = []
        bestTestAUCs = numpy.zeros(folds)
        bestTestROCs = []
        bestMetaDicts = []
        i = 0

        for trainInds, testInds in indexList:
            Util.printIteration(i, 1, folds, "Outer CV: ")
            trainX, trainY = X[trainInds, :], Y[trainInds]
            testX, testY = X[testInds, :], Y[testInds]

            self.learnModel(trainX, trainY)
            #self.learnModelCut(trainX, trainY)

            predTrainY = self.predict(trainX)
            predTestY = self.predict(testX)
            bestTrainAUCs[i] = Evaluator.auc(predTrainY, trainY)
            bestTestAUCs[i] = Evaluator.auc(predTestY, testY)

            #Store the parameters and ROC curves
            bestTrainROCs.append(Evaluator.roc(trainY, predTrainY))
            bestTestROCs.append(Evaluator.roc(testY, predTestY))

            metaDict = {}
            bestMetaDicts.append(metaDict)

            i += 1

        logging.debug("Mean test AUC = " + str(numpy.mean(bestTestAUCs)))
        logging.debug("Std test AUC = " + str(numpy.std(bestTestAUCs)))
        allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs]

        return (bestParams, allMetrics, bestMetaDicts)

开发者ID:charanpald，项目名称:sandbox，代码行数:58，代码来源:AbstractTreeRank.py

示例6: supervisedMC23

# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
    def supervisedMC23(lists, itemList, topQList, verbose=False): 
        """
        A supervised version of MC2 of our own invention. The idea is to find a 
        linear combination of transition matrices to fit a given one. We just make
        sure it fits the stationary distribution. 
        """
        import cvxopt
        import cvxopt.solvers
        ell = len(lists)
        n = len(itemList)
        outputList, scores, PList = RankAggregator.MC2(lists, itemList, verbose=True)
        
        Py = RankAggregator.generateTransitionMatrix(topQList, itemList)
        u, v = scipy.sparse.linalg.eigs(Py.T, 1)
        v = numpy.array(v).flatten()

        c = numpy.zeros(v.shape[0])

        for i, P in enumerate(PList): 
            Q[:, i] = cvxopt.matrix(numpy.array(P.todense()).ravel()) 
            
        c = cvxopt.matrix(c)
        QQ = Q.T * Q
        
        Py = RankAggregator.generateTransitionMatrix(topQList, itemList)
        s = numpy.array(Py.todense()).ravel()
        s = cvxopt.matrix(s)
        
        G = cvxopt.spdiag((-numpy.ones(ell)).tolist())
        h = cvxopt.matrix(numpy.zeros(ell))
        
        A = cvxopt.matrix(numpy.ones(ell), (1, ell))
        b = cvxopt.matrix(numpy.ones(1))        
                
        q = -Q.T * s  
        
        sol = cvxopt.solvers.qp(QQ, q, G, h, A, b)
        
        alpha = numpy.array(sol['x'])
        
        #Combine the matrices 
        P = numpy.zeros((n, n))       
        
        for j, Pj in enumerate(PList): 
            Util.printIteration(j, 1, ell)
            P += alpha[j] * numpy.array(Pj.todense()) 

        P /= ell 
        
        outputList, scores = RankAggregator.computeOutputList(P, itemList)
        
        if verbose: 
            return outputList, scores, PList
        else: 
            return outputList, scores

开发者ID:charanpald，项目名称:wallhack，代码行数:57，代码来源:RankAggregator.py

示例7: learnModel

# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
    def learnModel(self, graph):
        """
        Learn a prediction model based on considering ego networks as independent.
        For each ego, X contains a list of neighbours and the corresponding labels
        are the values of the edge labels. We then find the set of primal weights
        w for each ego network and then regress onto the set of weights using the
        ego labels.

        :param graph: The input graph to learn from.
        :type graph: class:`apgl.graph.AbstractSingleGraph`
        """

        logging.info("Learning model on graph of size " + str(graph.getNumVertices()))
        logging.info("EgoLearner: " + str(self.egoRegressor))
        logging.info("AlterLearner: " + str(self.alterRegressor))

        allIndices = numpy.arange(0, graph.getNumVertices())
        V = graph.getVertexList().getVertices(list(allIndices))
        W = numpy.zeros((0, graph.getVertexList().getNumFeatures()))
        Xe  =  numpy.zeros((0, graph.getVertexList().getNumFeatures()))
        printStep = numpy.floor(graph.getNumVertices()/10)
        alterError = 0.0 

        for i in range(graph.getNumVertices()):
            Util.printIteration(i, printStep, graph.getNumVertices())
            neighbours = graph.neighbours(i)

            if neighbours.shape[0] != 0:
                X = V[neighbours, :]
                y = numpy.ones(X.shape[0])

                for j in range(neighbours.shape[0]):
                    y[j] = graph.getEdge(i, neighbours[j])


                w = self.alterRegressor.learnModel(X, y)
                #alterError = numpy.mean(numpy.abs(self.alterRegressor.predict(X) - y))

                W = numpy.r_[W, numpy.array([w])]
                Xe = numpy.r_[Xe, numpy.array([V[i, :]])]

        #Now we need to solve least to find regressor of Xe onto W
        logging.info("Finding regression matrix onto weights using matrix of size " + str(Xe.shape))
        gc.collect()
        #self.standardiser = Standardiser()
        #self.standardiser2 = Standardiser()
        #Xe = self.standardiser.standardiseArray(Xe)
        #W = self.standardiser2.standardiseArray(W)
        self.egoRegressor.learnModel(Xe, W)


        return W

开发者ID:charanpald，项目名称:sandbox，代码行数:54，代码来源:EgoEdgeLabelPredictor.py

示例8: modelSelect

# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
    def modelSelect(self, X): 
        """
        Perform model selection on X and return the best parameters. 
        """
        m, n = X.shape
        cvInds = Sampling.randCrossValidation(self.folds, X.nnz)
        localAucs = numpy.zeros((self.ks.shape[0], self.lmbdas.shape[0], len(cvInds)))
        
        logging.debug("Performing model selection")
        paramList = []        
        
        for icv, (trainInds, testInds) in enumerate(cvInds):
            Util.printIteration(icv, 1, self.folds, "Fold: ")

            trainX = SparseUtils.submatrix(X, trainInds)
            testX = SparseUtils.submatrix(X, testInds)
            
            testOmegaList = SparseUtils.getOmegaList(testX)
            
            for i, k in enumerate(self.ks): 
                maxLocalAuc = self.copy()
                maxLocalAuc.k = k
                paramList.append((trainX, testX, testOmegaList, maxLocalAuc))
                    
        pool = multiprocessing.Pool(processes=self.numProcesses, maxtasksperchild=100)
        resultsIterator = pool.imap(localAucsLmbdas, paramList, self.chunkSize)
        #import itertools
        #resultsIterator = itertools.imap(localAucsLmbdas, paramList)
        
        for icv, (trainInds, testInds) in enumerate(cvInds):        
            for i, k in enumerate(self.ks): 
                tempAucs = resultsIterator.next()
                localAucs[i, :, icv] = tempAucs
        
        pool.terminate()
        
        meanLocalAucs = numpy.mean(localAucs, 2)
        stdLocalAucs = numpy.std(localAucs, 2)
        
        logging.debug(meanLocalAucs)
        
        k = self.ks[numpy.unravel_index(numpy.argmax(meanLocalAucs), meanLocalAucs.shape)[0]]
        lmbda = self.lmbdas[numpy.unravel_index(numpy.argmax(meanLocalAucs), meanLocalAucs.shape)[1]]
        
        logging.debug("Model parameters: k=" + str(k) + " lmbda=" + str(lmbda))
        
        self.k = k 
        self.lmbda = lmbda 
        
        return meanLocalAucs, stdLocalAucs

开发者ID:charanpald，项目名称:sandbox，代码行数:52，代码来源:WarpMf.py

示例9: supervisedMC22

# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
    def supervisedMC22(lists, itemList, topQList, verbose=False): 
        """
        A supervised version of MC2 of our own invention. The idea is to find a 
        linear combination of transition matrices to fit a given one. 
        """
        import cvxopt
        import cvxopt.solvers
        ell = len(lists)
        n = len(itemList)
        outputList, scores, PList = RankAggregator.MC2(lists, itemList, verbose=True)
        
        Q = cvxopt.spmatrix([], [], [], (n*n, len(lists)))

        for i, P in enumerate(PList): 
            #print(P.todense())
            Q[:, i] = cvxopt.matrix(numpy.array(P.todense()).ravel()) 
            
        QQ = Q.T * Q
        
        Py = RankAggregator.generateTransitionMatrix(topQList, itemList)
        s = numpy.array(Py.todense()).ravel()
        s = cvxopt.matrix(s)
        
        G = cvxopt.spdiag((-numpy.ones(ell)).tolist())
        h = cvxopt.matrix(numpy.zeros(ell))
        
        A = cvxopt.matrix(numpy.ones(ell), (1, ell))
        b = cvxopt.matrix(numpy.ones(1))        
                
        q = -Q.T * s  
        
        sol = cvxopt.solvers.qp(QQ, q, G, h, A, b)
        
        alpha = numpy.array(sol['x'])
        
        #Combine the matrices 
        P = numpy.zeros((n, n))       
        
        for j, Pj in enumerate(PList): 
            Util.printIteration(j, 1, ell)
            P += alpha[j] * numpy.array(Pj.todense()) 

        P /= ell 
        
        outputList, scores = RankAggregator.computeOutputList(P, itemList)
        
        if verbose: 
            return outputList, scores, PList
        else: 
            return outputList, scores

开发者ID:charanpald，项目名称:wallhack，代码行数:52，代码来源:RankAggregator.py

示例10: modelSelect

# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
    def modelSelect(self, X): 
        """
        Perform model selection on X and return the best parameters. 
        """
        m, n = X.shape
        cvInds = Sampling.randCrossValidation(self.folds, X.nnz)
        precisions = numpy.zeros((self.ks.shape[0], len(cvInds)))
        
        logging.debug("Performing model selection")
        paramList = []        
        
        for icv, (trainInds, testInds) in enumerate(cvInds):
            Util.printIteration(icv, 1, self.folds, "Fold: ")

            trainX = SparseUtils.submatrix(X, trainInds)
            testX = SparseUtils.submatrix(X, testInds)
            
            testOmegaList = SparseUtils.getOmegaList(testX)
            
            for i, k in enumerate(self.ks): 
                learner = self.copy()
                learner.k = k
                paramList.append((trainX, testX, testOmegaList, learner))
                    
        #pool = multiprocessing.Pool(processes=self.numProcesses, maxtasksperchild=100)
        #resultsIterator = pool.imap(computePrecision, paramList, self.chunkSize)
        import itertools
        resultsIterator = itertools.imap(computePrecision, paramList)
        
        for icv, (trainInds, testInds) in enumerate(cvInds):        
            for i, k in enumerate(self.ks): 
                tempPrecision = resultsIterator.next()
                precisions[i, icv] = tempPrecision
        
        #pool.terminate()
        
        meanPrecisions = numpy.mean(precisions, 1)
        stdPrecisions = numpy.std(precisions, 1)
        
        logging.debug(meanPrecisions)
        
        k = self.ks[numpy.argmax(meanPrecisions)]

        
        logging.debug("Model parameters: k=" + str(k)) 
        
        self.k = k 
        
        return meanPrecisions, stdPrecisions

开发者ID:charanpald，项目名称:sandbox，代码行数:51，代码来源:KNNRecommender.py

示例11: evaluateCvOuter

# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
    def evaluateCvOuter(self, X, Y, folds, leafRank):
        """
        Run cross validation and output some ROC curves. In this case Y is a 1D array.
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkInt(folds, 2, float('inf'))
        if Y.ndim != 1:
            raise ValueError("Expecting Y to be 1D")

        indexList = cross_val.StratifiedKFold(Y, folds)
        self.setLeafRank(leafRank)

        bestParams = []
        bestTrainAUCs = numpy.zeros(folds)
        bestTrainROCs = []
        bestTestAUCs = numpy.zeros(folds)
        bestTestROCs = []
        bestMetaDicts = []
        i = 0

        for trainInds, testInds in indexList:
            Util.printIteration(i, 1, folds)
            trainX, trainY = X[trainInds, :], Y[trainInds]
            testX, testY = X[testInds, :], Y[testInds]

            logging.debug("Distribution of labels in train: " + str(numpy.bincount(trainY)))
            logging.debug("Distribution of labels in test: " + str(numpy.bincount(testY)))

            self.learnModel(trainX, trainY)
            predTrainY = self.predict(trainX)
            predTestY = self.predict(testX)
            bestTrainAUCs[i] = Evaluator.auc(predTrainY, trainY)
            bestTestAUCs[i] = Evaluator.auc(predTestY, testY)

            #Store the parameters and ROC curves
            bestTrainROCs.append(Evaluator.roc(trainY, predTrainY))
            bestTestROCs.append(Evaluator.roc(testY, predTestY))

            metaDict = {}
            bestMetaDicts.append(metaDict)

            i += 1

        logging.debug("Mean test AUC = " + str(numpy.mean(bestTestAUCs)))
        logging.debug("Std test AUC = " + str(numpy.std(bestTestAUCs)))
        allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs]

        return (bestParams, allMetrics, bestMetaDicts)

开发者ID:charanpald，项目名称:sandbox，代码行数:51，代码来源:TreeRankForestR.py

示例12: generate_data_file

# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
    def generate_data_file(dir, nb_user=None):
        logging.debug("nb_user: " + str(nb_user))
        BemolData.assert_nb_user(nb_user)
        if nb_user == None:
            nb_user = BemolData.nb_max_user()

        # generate the file containing all the dataset
        # !!!!! security failure TOCTTOU
        f_data_name = BemolData.get_file_name(dir, BemolData.nb_max_user())
        if not os.path.exists(f_data_name):
            logging.info("creating file " + str(f_data_name))
            shutil.copy(BemolData.get_file_name(dir, None), f_data_name)

        # other files to generate
        nb_user_to_generate = []
        current_nb_user = BemolData.get_nb_user_to_read(nb_user)
        logging.debug("current_nb_user before while: " + str(current_nb_user))
        # !!!!! security failure TOCTTOU
        while (not os.path.exists(BemolData.get_file_name(dir, current_nb_user))):
            logging.debug("current_nb_user in while: " + str(current_nb_user))
            nb_user_to_generate.append(current_nb_user)
            current_nb_user = BemolData.get_nb_user_to_read(current_nb_user+1)
        nb_user_to_generate.reverse()

    
        # generate other files
        for current_nb_user in nb_user_to_generate:
            # read data
            f_existing_data_name = BemolData.get_file_name(dir, current_nb_user+1)
            f_to_create_data_name = BemolData.get_file_name(dir, current_nb_user)
            logging.info("creating file " + f_to_create_data_name)
            dict_user = MyDictionary()
            try:
                f_existing_data = gzip.open(f_existing_data_name, 'rb')
                f_to_create_data = gzip.open(f_to_create_data_name, 'wb')

                i = 0
                i_max = BemolData.get_nb_line(f_existing_data_name)
                for line in f_existing_data:
                    Util.printIteration(i, 1000, i_max); i += 1
                    m = re.match("(\d+)\s(\d+)\s(\d+)\s(\d+)", line)
                    if dict_user.index(int(m.group(1))) < current_nb_user:
                        f_to_create_data.write(line)
            except IOError as error:
                if error.filename == f_existing_data:
                    raise RGIOError(error, RGIOError.indent() + 'it disappeared in the meanwhile')
                else:
                    raise error

开发者ID:charanpald，项目名称:wallhack，代码行数:50，代码来源:BemolData.py

示例13: learnModel

# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
    def learnModel(self, graph):
        """
        Learn a prediction model based on all of the edges of the input graph.
        For each ego, X contains a list of neighbours and non-neighbours in the same
        ratio, and y = 1 when for a neighbour otherwise -1. We then find the set of
        primal weights w for each ego network and then regress onto the set of weights
        using the ego labels.

        One can either learn by comparing neighbours and non-neighbours, or alternatively
        using the labels of edges and making prediction on unlabelled edges. 

        :param graph: The input graph to learn from.
        :type graph: class:`apgl.graph.AbstractSingleGraph`

        :param randomNegLabel: How to compute edge labels, False means use the labels
        themselves, and True means randomly pick non-neighbours to have -1 labels
        :type randomNegLabel: class `bool`
        """

        Parameter.checkInt(self.windowSize, 1, graph.getNumVertices())
        self.graph = graph
        logging.info("Learning model on graph of size " + str(graph.getNumVertices()))

        allIndices = numpy.arange(0, graph.getNumVertices())
        V = graph.getVertexList().getVertices(allIndices)
        W = numpy.zeros((0, graph.getVertexList().getNumFeatures()))
        Xe  =  numpy.zeros((0, graph.getVertexList().getNumFeatures()))
        printStep = numpy.floor(graph.getNumVertices()/10)

        for i in range(graph.getNumVertices()):
            Util.printIteration(i, printStep, graph.getNumVertices())
            neighbours = graph.neighbours(i)

            if neighbours.shape[0] != 0:
                compNeighbours = numpy.setdiff1d(allIndices, neighbours)
                perm = numpy.random.permutation(compNeighbours.shape[0])[0:neighbours.shape[0]]
                negativeVertices = V[compNeighbours[perm], :]
                X = numpy.r_[V[neighbours, :], negativeVertices]
                y = numpy.ones(X.shape[0])
                y[neighbours.shape[0]:] = -1
 
                w = self.alterRegressor.learnModel(X, y)
                W = numpy.r_[W, numpy.array([w])]
                Xe = numpy.r_[Xe, numpy.array([V[i, :]])]

        #Now we need to solve least to find regressor of Xe onto W
        self.egoRegressor.learnModel(Xe, W)

开发者ID:charanpald，项目名称:sandbox，代码行数:49，代码来源:EgoEdgePredictor.py

示例14: coauthorsGraphFromAuthors

# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
 def coauthorsGraphFromAuthors(self, relevantExperts): 
     """
     Take a set of relevant authors and return the graph. 
     """
     dataFile = open(self.dataFilename)  
     authorIndexer = IdIndexer()
     author1Inds = array.array("i")
     author2Inds = array.array("i")
     
     for relevantExpert in relevantExperts: 
         authorIndexer.append(relevantExpert)
     
     for i, line in enumerate(dataFile):
         Util.printIteration(i, self.stepSize, self.numLines)
         authors = re.findall("#@(.*)", line)  
                         
         if len(authors) != 0: 
             authors = set([x.strip() for x in authors[0].split(",")]) 
             if len(authors.intersection(relevantExperts)) != 0: 
                 iterator = itertools.combinations(authors, 2)
             
                 for author1, author2 in iterator: 
                     if author1 in relevantExperts and author2 in relevantExperts: 
                         author1Ind = authorIndexer.append(author1) 
                         author2Ind = authorIndexer.append(author2)
                             
                         author1Inds.append(author1Ind)
                         author2Inds.append(author2Ind)
     
     logging.debug("Found " + str(len(authorIndexer.getIdDict())) + " coauthors")
                            
     #Coauthor graph is undirected 
     author1Inds = numpy.array(author1Inds, numpy.int)
     author2Inds = numpy.array(author2Inds, numpy.int)
     edges = numpy.c_[author1Inds, author2Inds]            
     
     graph = igraph.Graph()
     graph.add_vertices(len(authorIndexer.getIdDict()))
     graph.add_edges(edges)
     graph.es["weight"] = numpy.ones(graph.ecount())
     graph.simplify(combine_edges=sum)   
     graph.es["invWeight"] = 1.0/(numpy.array(graph.es["weight"])) 
     
     return graph, authorIndexer

开发者ID:charanpald，项目名称:wallhack，代码行数:46，代码来源:ArnetMinerDataset.py

示例15: MC2

# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import printIteration [as 别名]
    def MC2(lists, itemList, alpha=None, verbose=False): 
        """
        Perform weighted rank aggregation using MC2 as given in Rank Aggregation Methods 
        for the Web, Dwork et al. The weighting vector is given by alpha. 
        
        :param lists: A list of lists. Each sublist is an ordered set of a subset of the items from itemList 
        
        :param itemList: A list of all possible items 
        
        :param alpha: A vector of weights for the transition matrices 
        """
        
        n = len(itemList)
        ell = len(lists)
        
        if alpha == None: 
            alpha = numpy.ones(ell)/ell
        
        #P = numpy.zeros((n, n))
        P = scipy.sparse.csr_matrix((n, n))        
        PList = [] 
        
        logging.debug("Computing permutation matrices")
        for j, lst in enumerate(lists): 
            Util.printIteration(j, 1, ell)
            Pj = RankAggregator.generateTransitionMatrix(lst, itemList)

            P = P + alpha[j] * Pj 
            PList.append(Pj)
        
        P /= ell 
        logging.debug("Done")

        outputList,scores = RankAggregator.computeOutputList(P, itemList)
        
        if verbose: 
            return outputList, scores, PList
        else: 
            return outputList, scores

开发者ID:charanpald，项目名称:wallhack，代码行数:41，代码来源:RankAggregator.py

注：本文中的sandbox.util.Util.Util.printIteration方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。