當前位置: 首頁>>代碼示例>>Python>>正文


Python Helper.parseGenderBlogDatasetWithLabels方法代碼示例

本文整理匯總了Python中Helper.Helper.parseGenderBlogDatasetWithLabels方法的典型用法代碼示例。如果您正苦於以下問題:Python Helper.parseGenderBlogDatasetWithLabels方法的具體用法?Python Helper.parseGenderBlogDatasetWithLabels怎麽用?Python Helper.parseGenderBlogDatasetWithLabels使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在Helper.Helper的用法示例。


在下文中一共展示了Helper.parseGenderBlogDatasetWithLabels方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: tryOnlineLearn

# 需要導入模塊: from Helper import Helper [as 別名]
# 或者: from Helper.Helper import parseGenderBlogDatasetWithLabels [as 別名]
def tryOnlineLearn():
    testShare = 0.25
    valShare = 0.25
    trainingFeatures, trainingLabels = Helper.parseGenderBlogDatasetWithLabels('blog-gender-dataset.csv')  
    #trainingFeatures = random.shuffle(trainingFeatures)  
    if(onlineLearning_HW2.FEATURE_TYPE == 'word2vec'):
        trainingFeatures = onlineLearning_HW2.readDataVecs('genderBlogDatasetVectors.txt')    
    else:
        vocab = onlineLearning_HW2.buildVocab(trainingFeatures)
        trainingFeatures = onlineLearning_HW2.data2bow(trainingFeatures,vocab)
           
                   
    tln = len(trainingFeatures)
    valData = trainingFeatures[0:int(tln*valShare)]
    valLabel = trainingLabels[0:int(tln*valShare)]
    vln = len(valData)
    testData = trainingFeatures[vln:int(vln+tln*testShare)]
    testLabel = trainingLabels[vln:int(vln+tln*testShare)]
    ln = len(valData)+len(testData)
    trainingFeatures = trainingFeatures[ln:]
    trainingLabels = trainingLabels[ln:]        
    
    
    model = onlineLearning_HW2.avgPerceptron(trainingFeatures, trainingLabels)
    
    predictionsTraining = onlineLearning_HW2.classify(trainingFeatures, trainingLabels, model, trainingFeatures, trainingLabels)
    predictionsVal = onlineLearning_HW2.classify(valData, valLabel, model, trainingFeatures, trainingLabels)
    predictionsTest = onlineLearning_HW2.classify(testData, testLabel, model, trainingFeatures, trainingLabels)
    
    print('Training set:')
    res = onlineLearning_HW2.checkPerformance(trainingLabels, predictionsTraining) 
    print(res)   
    print('Validation set:')
    res = onlineLearning_HW2.checkPerformance(valLabel, predictionsVal)
    print(res)    
    print('Test set:')
    res = onlineLearning_HW2.checkPerformance(testLabel, predictionsTest)
    print(res)
開發者ID:mohazahran,項目名稱:AuthorID-ML-Project,代碼行數:40,代碼來源:AuthorId.py

示例2: paramSelectOnlineLearning

# 需要導入模塊: from Helper import Helper [as 別名]
# 或者: from Helper.Helper import parseGenderBlogDatasetWithLabels [as 別名]
def paramSelectOnlineLearning():
    
    writer = open('results_log.txt','w')
    # reading the training data, validation data, test data
    print ('>> Parsing all data sets ...')
    testShare = 0.25
    valShare = 0.25    
    trainingData, trainingLabels = Helper.parseGenderBlogDatasetWithLabels('blog-gender-dataset.csv')  
       
    print ('>> Building vocabulary ...')
    #vocab = buildVocab(trainingData, valData)        
         
    bestVal = 0
    bestValParam = ''    
    
    bestTest = 0
    bestTestParam = ''    
    
    bestTrain = 0
    bestTrainParam = ''    
    
    for feat in onlineLearning_HW2.FEATURE_TYPE_LIST:
        global FEATURE_TYPE
        FEATURE_TYPE = feat               
                         
        for boolean in onlineLearning_HW2.BOOLEAN_TYPE_LIST:
            global BOOLEAN_TYPE
            BOOLEAN_TYPE = boolean
             
            if(feat != 'word2vec'):
                vocab = onlineLearning_HW2.buildVocab(trainingData)
                trainingFeatures = onlineLearning_HW2.data2bow(trainingData,vocab) 
                
            if(feat == 'word2vec'):
                trainingFeatures = onlineLearning_HW2.readDataVecs('genderBlogDatasetVectors.txt')    
                
            tln = len(trainingFeatures)
            valFeatures = trainingFeatures[0:int(tln*valShare)]
            valLabel = trainingLabels[0:int(tln*valShare)]
            vln = len(valFeatures)
            testFeatures = trainingFeatures[vln:int(vln+tln*testShare)]
            testLabel = trainingLabels[vln:int(vln+tln*testShare)]
            ln = len(valFeatures)+len(testFeatures)
            trainingFeatures = trainingFeatures[ln:]
            trainingOnlyLabels = trainingLabels[ln:]                
            
            #print ('>> Starting Training ...')
            for typee in onlineLearning_HW2.LEARNING_TYPE_LIST:
                for margin in onlineLearning_HW2.MARGIN_LIST:
                    for maxIter in onlineLearning_HW2.MAX_ITERATION_LIST:
                        for lrate in onlineLearning_HW2.LEARNING_RATE_LIST:
                             
                            #try:
                                #global onlineLearning_HW2.MARGIN
                                onlineLearning_HW2.MARGIN = margin                                                    
                                  
                                #global onlineLearning_HW2.MAX_ITERATION
                                onlineLearning_HW2.MAX_ITERATION = maxIter
                                  
                                #global onlineLearning_HW2.LEARNING_RATE
                                onlineLearning_HW2.LEARNING_RATE = lrate
                                  
                                #global onlineLearning_HW2.LEARNING_TYPE
                                onlineLearning_HW2.LEARNING_TYPE = typee
                                  
                                myStr = 'FEATURE_TYPE='+str(feat) +' BOOLEAN_FEATURES='+str(boolean) +' LEARNING_TYPE='+str(typee)+ ' MARGIN='+str(margin) + ' MAX_ITERATION='+str(maxIter)+' LEARNING_RATE='+str(lrate)
                                writer.write('\n'+myStr)    
                                writer.flush()  
                                print (myStr)
                                #print ('>> Starting training ...')
                                if(onlineLearning_HW2.LEARNING_TYPE == 'p'):
                                    model = onlineLearning_HW2.perceptron(trainingFeatures, trainingOnlyLabels)
                                elif(onlineLearning_HW2.LEARNING_TYPE == 'avgP'):
                                    model = onlineLearning_HW2.avgPerceptron(trainingFeatures, trainingOnlyLabels)
                                elif(onlineLearning_HW2.LEARNING_TYPE == 'w'):
                                    model = onlineLearning_HW2.winnow(trainingFeatures, trainingOnlyLabels)
                                else:
                                    model = onlineLearning_HW2.kernelPerceptron(trainingFeatures, trainingOnlyLabels)
                                  
                                  
                                #print ('>> Making predictions ...')
                                predictionsTraining = onlineLearning_HW2.classify(trainingFeatures, trainingOnlyLabels, model, trainingFeatures, trainingOnlyLabels)    
                                predictionsVal = onlineLearning_HW2.classify(valFeatures, valLabel, model, trainingFeatures, trainingOnlyLabels)
                                predictionsTest = onlineLearning_HW2.classify(testFeatures, testLabel, model, trainingFeatures, trainingOnlyLabels)
                                  
                                #print ('>> Calculating performance ...')
                                #print('Training set:')
                                res, trainAcc = onlineLearning_HW2.checkPerformance(trainingOnlyLabels, predictionsTraining)
                                writer.write('\nTRAIN: '+res)
                                #print('Validation set:')
                                res, valAcc = onlineLearning_HW2.checkPerformance(valLabel, predictionsVal)
                                writer.write('\nVAL  : '+res)
                                #print('Test set:')
                                res, testAcc = onlineLearning_HW2.checkPerformance(testLabel, predictionsTest)
                                writer.write('\nTEST : '+res)
                                  
                                writer.flush()
                                                                
                           # except:
                            #    writer.write('>> Expection !')
#.........這裏部分代碼省略.........
開發者ID:mohazahran,項目名稱:AuthorID-ML-Project,代碼行數:103,代碼來源:AuthorId.py


注:本文中的Helper.Helper.parseGenderBlogDatasetWithLabels方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。