当前位置: 首页>>代码示例>>Python>>正文


Python Helper.parseGenderBlogDatasetWithLabels方法代码示例

本文整理汇总了Python中Helper.Helper.parseGenderBlogDatasetWithLabels方法的典型用法代码示例。如果您正苦于以下问题:Python Helper.parseGenderBlogDatasetWithLabels方法的具体用法?Python Helper.parseGenderBlogDatasetWithLabels怎么用?Python Helper.parseGenderBlogDatasetWithLabels使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Helper.Helper的用法示例。


在下文中一共展示了Helper.parseGenderBlogDatasetWithLabels方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: tryOnlineLearn

# 需要导入模块: from Helper import Helper [as 别名]
# 或者: from Helper.Helper import parseGenderBlogDatasetWithLabels [as 别名]
def tryOnlineLearn():
    testShare = 0.25
    valShare = 0.25
    trainingFeatures, trainingLabels = Helper.parseGenderBlogDatasetWithLabels('blog-gender-dataset.csv')  
    #trainingFeatures = random.shuffle(trainingFeatures)  
    if(onlineLearning_HW2.FEATURE_TYPE == 'word2vec'):
        trainingFeatures = onlineLearning_HW2.readDataVecs('genderBlogDatasetVectors.txt')    
    else:
        vocab = onlineLearning_HW2.buildVocab(trainingFeatures)
        trainingFeatures = onlineLearning_HW2.data2bow(trainingFeatures,vocab)
           
                   
    tln = len(trainingFeatures)
    valData = trainingFeatures[0:int(tln*valShare)]
    valLabel = trainingLabels[0:int(tln*valShare)]
    vln = len(valData)
    testData = trainingFeatures[vln:int(vln+tln*testShare)]
    testLabel = trainingLabels[vln:int(vln+tln*testShare)]
    ln = len(valData)+len(testData)
    trainingFeatures = trainingFeatures[ln:]
    trainingLabels = trainingLabels[ln:]        
    
    
    model = onlineLearning_HW2.avgPerceptron(trainingFeatures, trainingLabels)
    
    predictionsTraining = onlineLearning_HW2.classify(trainingFeatures, trainingLabels, model, trainingFeatures, trainingLabels)
    predictionsVal = onlineLearning_HW2.classify(valData, valLabel, model, trainingFeatures, trainingLabels)
    predictionsTest = onlineLearning_HW2.classify(testData, testLabel, model, trainingFeatures, trainingLabels)
    
    print('Training set:')
    res = onlineLearning_HW2.checkPerformance(trainingLabels, predictionsTraining) 
    print(res)   
    print('Validation set:')
    res = onlineLearning_HW2.checkPerformance(valLabel, predictionsVal)
    print(res)    
    print('Test set:')
    res = onlineLearning_HW2.checkPerformance(testLabel, predictionsTest)
    print(res)
开发者ID:mohazahran,项目名称:AuthorID-ML-Project,代码行数:40,代码来源:AuthorId.py

示例2: paramSelectOnlineLearning

# 需要导入模块: from Helper import Helper [as 别名]
# 或者: from Helper.Helper import parseGenderBlogDatasetWithLabels [as 别名]
def paramSelectOnlineLearning():
    
    writer = open('results_log.txt','w')
    # reading the training data, validation data, test data
    print ('>> Parsing all data sets ...')
    testShare = 0.25
    valShare = 0.25    
    trainingData, trainingLabels = Helper.parseGenderBlogDatasetWithLabels('blog-gender-dataset.csv')  
       
    print ('>> Building vocabulary ...')
    #vocab = buildVocab(trainingData, valData)        
         
    bestVal = 0
    bestValParam = ''    
    
    bestTest = 0
    bestTestParam = ''    
    
    bestTrain = 0
    bestTrainParam = ''    
    
    for feat in onlineLearning_HW2.FEATURE_TYPE_LIST:
        global FEATURE_TYPE
        FEATURE_TYPE = feat               
                         
        for boolean in onlineLearning_HW2.BOOLEAN_TYPE_LIST:
            global BOOLEAN_TYPE
            BOOLEAN_TYPE = boolean
             
            if(feat != 'word2vec'):
                vocab = onlineLearning_HW2.buildVocab(trainingData)
                trainingFeatures = onlineLearning_HW2.data2bow(trainingData,vocab) 
                
            if(feat == 'word2vec'):
                trainingFeatures = onlineLearning_HW2.readDataVecs('genderBlogDatasetVectors.txt')    
                
            tln = len(trainingFeatures)
            valFeatures = trainingFeatures[0:int(tln*valShare)]
            valLabel = trainingLabels[0:int(tln*valShare)]
            vln = len(valFeatures)
            testFeatures = trainingFeatures[vln:int(vln+tln*testShare)]
            testLabel = trainingLabels[vln:int(vln+tln*testShare)]
            ln = len(valFeatures)+len(testFeatures)
            trainingFeatures = trainingFeatures[ln:]
            trainingOnlyLabels = trainingLabels[ln:]                
            
            #print ('>> Starting Training ...')
            for typee in onlineLearning_HW2.LEARNING_TYPE_LIST:
                for margin in onlineLearning_HW2.MARGIN_LIST:
                    for maxIter in onlineLearning_HW2.MAX_ITERATION_LIST:
                        for lrate in onlineLearning_HW2.LEARNING_RATE_LIST:
                             
                            #try:
                                #global onlineLearning_HW2.MARGIN
                                onlineLearning_HW2.MARGIN = margin                                                    
                                  
                                #global onlineLearning_HW2.MAX_ITERATION
                                onlineLearning_HW2.MAX_ITERATION = maxIter
                                  
                                #global onlineLearning_HW2.LEARNING_RATE
                                onlineLearning_HW2.LEARNING_RATE = lrate
                                  
                                #global onlineLearning_HW2.LEARNING_TYPE
                                onlineLearning_HW2.LEARNING_TYPE = typee
                                  
                                myStr = 'FEATURE_TYPE='+str(feat) +' BOOLEAN_FEATURES='+str(boolean) +' LEARNING_TYPE='+str(typee)+ ' MARGIN='+str(margin) + ' MAX_ITERATION='+str(maxIter)+' LEARNING_RATE='+str(lrate)
                                writer.write('\n'+myStr)    
                                writer.flush()  
                                print (myStr)
                                #print ('>> Starting training ...')
                                if(onlineLearning_HW2.LEARNING_TYPE == 'p'):
                                    model = onlineLearning_HW2.perceptron(trainingFeatures, trainingOnlyLabels)
                                elif(onlineLearning_HW2.LEARNING_TYPE == 'avgP'):
                                    model = onlineLearning_HW2.avgPerceptron(trainingFeatures, trainingOnlyLabels)
                                elif(onlineLearning_HW2.LEARNING_TYPE == 'w'):
                                    model = onlineLearning_HW2.winnow(trainingFeatures, trainingOnlyLabels)
                                else:
                                    model = onlineLearning_HW2.kernelPerceptron(trainingFeatures, trainingOnlyLabels)
                                  
                                  
                                #print ('>> Making predictions ...')
                                predictionsTraining = onlineLearning_HW2.classify(trainingFeatures, trainingOnlyLabels, model, trainingFeatures, trainingOnlyLabels)    
                                predictionsVal = onlineLearning_HW2.classify(valFeatures, valLabel, model, trainingFeatures, trainingOnlyLabels)
                                predictionsTest = onlineLearning_HW2.classify(testFeatures, testLabel, model, trainingFeatures, trainingOnlyLabels)
                                  
                                #print ('>> Calculating performance ...')
                                #print('Training set:')
                                res, trainAcc = onlineLearning_HW2.checkPerformance(trainingOnlyLabels, predictionsTraining)
                                writer.write('\nTRAIN: '+res)
                                #print('Validation set:')
                                res, valAcc = onlineLearning_HW2.checkPerformance(valLabel, predictionsVal)
                                writer.write('\nVAL  : '+res)
                                #print('Test set:')
                                res, testAcc = onlineLearning_HW2.checkPerformance(testLabel, predictionsTest)
                                writer.write('\nTEST : '+res)
                                  
                                writer.flush()
                                                                
                           # except:
                            #    writer.write('>> Expection !')
#.........这里部分代码省略.........
开发者ID:mohazahran,项目名称:AuthorID-ML-Project,代码行数:103,代码来源:AuthorId.py


注:本文中的Helper.Helper.parseGenderBlogDatasetWithLabels方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。