当前位置: 首页>>代码示例>>Python>>正文


Python IOtools.tocsv方法代码示例

本文整理汇总了Python中sentimentfinding.IOtools.tocsv方法的典型用法代码示例。如果您正苦于以下问题:Python IOtools.tocsv方法的具体用法?Python IOtools.tocsv怎么用?Python IOtools.tocsv使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sentimentfinding.IOtools的用法示例。


在下文中一共展示了IOtools.tocsv方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_fold_averages_ablation

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def get_fold_averages_ablation():
    ablationCVscoresroot = "/home/dicle/Dicle/Tez/corpusstats/learning11/ablation2/"
    ablationtypes = ["item", "group", "onedim"]
    
    annotationtypes = ["double"]
    featsets = ["redef-rat_lex-rat"]
    '''labelunions = ["EACHobj-EACHsubj","ALLobj-ALLsubj","ALLobj-STGsubj", 
               "STGobj-ALLsubj", "STGobj-STGsubj", "WKobj-WKsubj"]
    '''
    
    
    for ablationtype in ablationtypes:
        
        print ablationtype
        
        p1 = os.path.join(ablationCVscoresroot, ablationtype, "scores")
        
        exclusionnames = IOtools.getfoldernames_of_dir(p1)
        
        for excname in exclusionnames:
            
            bigdf = pd.DataFrame(columns=metaexperimentation.performanceheader)
            
            p2 = os.path.join(p1, excname)
            
            for annottype in annotationtypes:
                p3 = os.path.join(p2, annottype)
                
                for featset in featsets:
                    p4 = os.path.join(p3, featset)
                    combname = IOtools.getfoldernames_of_dir(p4)[0] # we know that there is only one folder
                    
                    p5 = os.path.join(p4, combname)
                    labelunions = IOtools.getfoldernames_of_dir(p5)
                    
                    for labelunion in labelunions: 
                        p6 = os.path.join(p5, labelunion)
                        
                        folds = IOtools.getfoldernames_of_dir(p6)
                        
                        for foldno in folds:
                            p7 = os.path.join(p6, foldno)
                                                        
                            scorecsvfilepath = p7 + os.sep + metaexperimentation.scorefilename+".csv"
                            scorecsvfile = IOtools.readcsv(scorecsvfilepath)
                            
                            print " scorefile ",scorecsvfilepath,"  ",scorecsvfile.shape
                            
                            #rankdf = matrixhelpers.get_first_N_rows(scorecsvfile, int(N / 2), metricnames, ascend=takeworst)
                            rankdf = scorecsvfile.copy()
                            rankdf["labelunion"] = labelunion
                            rankdf["featureset"] = featset + " ** " + combname
                            rankdf["annottype"] = annottype
                            rankdf["fold"] = foldno
                            #dflist.append(rankdf)
                            bigdf = bigdf.append(rankdf)
    
            print bigdf.shape,"  ",p2
            IOtools.tocsv(bigdf, os.path.join(p2, "bigdf.csv"))
            get_fold_averages(p2)
开发者ID:dicleoztur,项目名称:tez0.1v,代码行数:62,代码来源:performance_evaluation_crossval.py

示例2: exclude_one_feature

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
   def exclude_one_feature(self):
 
       exclusionmap = utils.get_excluded_features_map()
       
       for exclusionname, featuremap in exclusionmap.iteritems():
           p1 = IOtools.ensure_dir(os.path.join(self.combinedfeaturesfolder, exclusionname))
           
           for featuregroup, combcodemap in featuremap.iteritems():
               p2 = IOtools.ensure_dir(os.path.join(p1, featuregroup))
               
               for combcode, row in combcodemap.iteritems():
                   featuredflist = []
                   
                   for j,featno in enumerate(row):
                       print combcode[:8],"  ",row, " featno= ",featno
                       if featno >= 0:
                           groupname = sorted(self.featuremap.keys())[j]
                           print " -> ",groupname           
                           extractorinstance = self.featuremap[groupname][featno]
                           featurematrixpath = extractorinstance.getfeaturematrixpath
                           featurematrix = IOtools.readcsv(featurematrixpath, keepindex=True)
                           featuredflist.append(featurematrix)
                    
                   datamatrix = pd.concat(featuredflist, axis=1) #, verify_integrity=True) # CLOSED DUE TO THE OVERLAPPING WORDS IN ABS AND SUBJ LISTS
                   
                   datamatrixpath = os.path.join(p2, combcode+".csv")  
                   IOtools.tocsv(datamatrix, datamatrixpath, keepindex=True)          
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:29,代码来源:features_combiner.py

示例3: best_score_per_annottype

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
 def best_score_per_annottype(self, metricname, scorepath=metaexperimentation.expscorepath):
     
     bigdf = pd.DataFrame(columns=metaexperimentation.performanceheader)
     
     #scorepath = os.path.join(self.experimentspath, "scores")
     annottypes = IOtools.getfoldernames_of_dir(scorepath)
     
     for annottype in annottypes:
         
         annotdf = pd.DataFrame(columns=metaexperimentation.performanceheader)
         
         p1 = os.path.join(scorepath, annottype)
         #featcombnames = IOtools.getfoldernames_of_dir(p1)  # list of combcode_NC names            
         metricclasses = IOtools.getfoldernames_of_dir(p1)
         
         for metricclass in metricclasses:
             
             p2 = os.path.join(p1, metricclass)
             featcombnames = IOtools.getfoldernames_of_dir(p2)
                
             for combname in featcombnames:
                 
                 p3 = os.path.join(p2, combname)
                 labelunions = IOtools.getfoldernames_of_dir(p3)
                 
                 
                 for labelunion in labelunions:
                 
                     p4 = os.path.join(p3, labelunion)  
                     folds = IOtools.getfoldernames_of_dir(p4)
                     
                     for fold in folds:
                         
                         p5 = os.path.join(p4, fold)                       
                             
                         scorecsvfilepath = p5 + os.sep + metaexperimentation.scorefilename+".csv"
                         scorecsvfile = IOtools.readcsv(scorecsvfilepath)
                         # drop clustering results as they are useless being not worked on (back validation missing)
                         scorecsvfile = scorecsvfile[np.logical_not(scorecsvfile.algorithm.str.startswith("_MT-Clustering"))]
                         
                         rankdf = matrixhelpers.get_first_N_rows(scorecsvfile, int(self.N / 2), [metricname], ascend=self.takeworst)
                         print rankdf.shape
                         #annotdf.loc[:, rankdf.columns.values.tolist()] = rankdf.values.copy()
                         print " ** ",annotdf.shape
                         rankdf["labelunion"] = labelunion
                         rankdf["featureset"] = metricclass + " ** " + combname
                         rankdf["annottype"] = annottype
                         #dflist.append(rankdf)
                         annotdf = annotdf.append(rankdf)
                         print scorecsvfile.shape
             
                         annotdf = matrixhelpers.get_first_N_rows(annotdf, self.N, [metricname], ascend=self.takeworst)  
             
                         bigdf = bigdf.append(annotdf)
         # insert annottype as colname to bigdf. cutbigdf from the first 10.
     
     bigdf.sort(["annottype", metricname], ascending=self.takeworst, inplace=True)
     #resultantdf = matrixhelpers.get_first_N_rows(bigdf, self.N)
     evaluationname = self.prefix+"_score_per_annottype-"+metricname.upper()
     IOtools.tocsv(bigdf, os.path.join(self.resultspath, evaluationname+".csv"))
开发者ID:dicleoztur,项目名称:tez0.1v,代码行数:62,代码来源:performance_evaluation_crossval.py

示例4: assign_annotator_aggreement

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def assign_annotator_aggreement(doubleannotated_path, doubleannot_filename):
    csvpath = os.path.join(doubleannotated_path, doubleannot_filename)
    doubleannotatedcsv = IOtools.readcsv(csvpath)
    nrows, _ = doubleannotatedcsv.shape
    
    doubleannotated_full4class = doubleannotatedcsv.loc[:, ["questionname", "answer"]].copy()
    doubleannotated_half2class = doubleannotatedcsv.loc[:, ["questionname", "answer"]].copy()
    
    # get full agreed and half agreed annotations:
    for i in range(nrows):
        answer1 = doubleannotatedcsv.loc[i, "answer1"]
        answer2 = doubleannotatedcsv.loc[i, "answer2"]
    
        if answer1 == answer2:
            doubleannotated_full4class.loc[i, "answer"] = answer1
        if answer1 in [1,2] and answer2 in [1,2]:   # elif?
            doubleannotated_half2class.loc[i, "answer"] = 12
        elif answer1 in [3,4] and answer2 in [3,4]:
            doubleannotated_half2class.loc[i, "answer"] = 34
    
    
    # filtrate non-agreeing rows:
    doubleannotated_full4class = doubleannotated_full4class[doubleannotated_full4class["answer"] > 0]
    csvpath1 = os.path.join(doubleannotated_path, "doubleannotated_fullagr4class.csv")
    IOtools.tocsv(doubleannotated_full4class, csvpath1)
    
    doubleannotated_half2class = doubleannotated_half2class[doubleannotated_half2class["answer"] > 0]
    csvpath2 = os.path.join(doubleannotated_path, "doubleannotated_halfagr2class.csv")
    IOtools.tocsv(doubleannotated_half2class, csvpath2)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:31,代码来源:goldsetbuilder.py

示例5: get_allfolds_bigdf

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def get_allfolds_bigdf(foldrootpath, annottype, featset, labelunion):
    
    bigdf = pd.DataFrame(columns=metaexperimentation.performanceheader)
    
    folds = IOtools.getfoldernames_of_dir(foldrootpath)
                        
    for foldno in folds:
        p1 = os.path.join(foldrootpath, foldno)
                                    
        scorecsvfilepath = p1 + os.sep + metaexperimentation.scorefilename+".csv"
        scorecsvfile = IOtools.readcsv(scorecsvfilepath)
        
        print " scorefile ",scorecsvfilepath,"  ",scorecsvfile.shape
        
        #rankdf = matrixhelpers.get_first_N_rows(scorecsvfile, int(N / 2), metricnames, ascend=takeworst)
        rankdf = scorecsvfile.copy()
        rankdf["labelunion"] = labelunion
        rankdf["featureset"] = featset 
        rankdf["annottype"] = annottype
        rankdf["fold"] = foldno
        bigdf = bigdf.append(rankdf)
        #dflist.append(rankdf)
    
    
    print "FOLDROOTPATH ",foldrootpath
    outcsvpath = os.path.join(foldrootpath, "bigdf.csv")
    IOtools.tocsv(bigdf, outcsvpath, False)
开发者ID:dicleoztur,项目名称:tez0.1v,代码行数:29,代码来源:performance_evaluation_crossval.py

示例6: get_randomly_annotated_set

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def get_randomly_annotated_set(incsvfilename, outcsvfilename,
                               incsvfolder=metacorpus.userannotatedpath, outcsvfolder=metacorpus.randomannotatedpath, 
                               randomchoicevalues=metacorpus.subjectivity_label_values.keys()):
    df = IOtools.readcsv(os.path.join(incsvfolder, incsvfilename))  # df cols: questionname,userid,answer
    randomdf= df.copy()
    numofrows, _ = randomdf.values.shape
    subjvalues = randomchoicevalues
    
    randomanswers = [random.choice(subjvalues) for _ in range(numofrows)]
    randomdf.loc[:, "answer"] = randomanswers
    
    # extra: assign 5 of the rows the value 5 for the answer 'no idea, ambiguous'
    notknowingrows = random.sample(range(numofrows), 5)
    
    '''
    for _ in range(5):
        randindex = random.randint(0, numofrows-1)
        while randindex in notknowingrows:
            randindex = random.randint(0, numofrows-1)
        notknowingrows.append(randindex)
    '''        
    #notknowingrows = [random.randint(0, numofrows-1) for _ in range(5)]  # be careful with this 5 number it is subject to change for the sake of statistical validity
    randomdf.loc[notknowingrows, "answer"] = 5
    
    IOtools.tocsv(randomdf, os.path.join(outcsvfolder, outcsvfilename))
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:27,代码来源:goldsetbuilder.py

示例7: get_AllObj_AllSubj_class

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def get_AllObj_AllSubj_class(originallabelspath, outfolder, in_NC=5):
    
    out_NC = 2
    if in_NC <= out_NC:
        return

    labeldf = IOtools.readcsv(originallabelspath, keepindex=True)
    
    outpath = os.path.join(ensure_unionclass_dir(outfolder, "ALLobj-ALLsubj", out_NC), metacorpus.labelsfilename + ".csv")
    
    labelvector = labeldf.values
    labelvector = np.array(labelvector, dtype=object)
    
    # replace values  12->"sub"; 34->"obj"
    labelvector[labelvector == 1] = 12
    labelvector[labelvector == 2] = 12
    labelvector[labelvector == 3] = 34
    labelvector[labelvector == 4] = 34
    
    for i,_ in enumerate(labelvector):
        if labelvector[i] == 5:
            labelvector[i] = random.choice([12, 34])
    
    twolabeldf = pd.DataFrame(labelvector, columns=labeldf.columns.values.tolist(), index=labeldf.index.values.tolist())
    IOtools.tocsv(twolabeldf, outpath, keepindex=True)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:27,代码来源:arrange_class_unions.py

示例8: combine_features

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
 def combine_features(self, combmatrix):
     ncombs, nrows = combmatrix.shape
     
     for i,row in enumerate(combmatrix):
         filename = "comb"+str(i)+"_F"
         featuredflist = []
         for j,featno in enumerate(row):
             groupname = sorted(self.featuremap.keys())[j]
             filename += "_"+str(j)+"-"+str(featno)   # filename = combNO_F_GROUPNO-FEATNO
                             
             extractorinstance = self.featuremap[groupname][featno]
             featurematrixpath = extractorinstance.getfeaturematrixpath
             featurematrix = IOtools.readcsv(featurematrixpath, keepindex=True)
             featuredflist.append(featurematrix)
         
         print filename
         print utils.decode_combcode(filename, self.featuremap)
         datamatrix = pd.concat(featuredflist, axis=1) #, verify_integrity=True) # CLOSED DUE TO THE OVERLAPPING WORDS IN ABS AND SUBJ LISTS
         #datamatrix['index'] = datamatrix.index
         #datamatrix = datamatrix.drop_duplicates(cols='index')
         #del datamatrix['index']
         
         # replace nan and inf cells !! no. work on matrix, not df. better do this change on learning
         #datamatrix[np.isnan(datamatrix)] = 0
         #datamatrix[np.isinf(datamatrix)] = 0
         
         datamatrixpath = self.combinedfeaturesfolder + os.sep + filename + ".csv"
         IOtools.tocsv(datamatrix, datamatrixpath, keepindex=True)
         
         # record comb name decoding
         decodednamesfolder = IOtools.ensure_dir(os.path.join(self.datasetrootpath, metacorpus.decodedcombnamesfoldername))
         decodedname = utils.tostr_decoded_combcode(filename, self.featuremap)
         IOtools.todisc_txt(decodedname, os.path.join(decodednamesfolder, filename+".txt"))
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:35,代码来源:features_combiner.py

示例9: get_2_classes

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def get_2_classes(labelrootpath, taggertype, in_NC=5):
    
    out_NC = 2
    if in_NC <= out_NC:
        return
    
    originallabelspath = os.path.join(labelrootpath, "NC"+metaexperimentation.intrafeatsep+str(in_NC), taggertype+".csv")
    labeldf = IOtools.readcsv(originallabelspath, keepindex=True)
    
    outlabelspath = os.path.join(ensure_nclass_dir(labelrootpath, out_NC), taggertype+".csv")
    
    labelvector = labeldf.values
    labelvector = np.array(labelvector, dtype=object)
    
    # replace values  12->"sub"; 34->"obj"
    labelvector[labelvector == 1] = 12
    labelvector[labelvector == 2] = 12
    labelvector[labelvector == 3] = 34
    labelvector[labelvector == 4] = 34
    
    for i,_ in enumerate(labelvector):
        if labelvector[i] == 5:
            labelvector[i] = random.choice([12, 34])
    
    twolabeldf = pd.DataFrame(labelvector, columns=labeldf.columns.values.tolist(), index=labeldf.index.values.tolist())
    IOtools.tocsv(twolabeldf, outlabelspath, keepindex=True)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:28,代码来源:arrange_N_classes.py

示例10: calculate_features

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
 def calculate_features(self):
     incsvpath = self.inputpath
     outcsvpath = self.recordpath
   
     postagdf = IOtools.readcsv(incsvpath, keepindex=True)
     adj_count_vect = postagdf.loc[:, "ADJ"].values
     countdf = pd.DataFrame(adj_count_vect, index=postagdf.index.values.tolist(), columns=[self.fname])
     IOtools.tocsv(countdf, outcsvpath, keepindex=True)   
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:10,代码来源:features_combiner.py

示例11: find_annotator_disagreements

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def find_annotator_disagreements(df, outpath):
    annotators = get_annotator_names(df)
    
    majority_disagr = initialize_map(annotators)
    gold_disagr = initialize_map(annotators)
    numofannotations = initialize_map(annotators)
    
    indices = df.index.tolist()
    for i in indices:
        gold_ans = df.loc[i, "GOLD"]
        sentence = df.loc[i, "sentenceid"]
        if not sentence.endswith(comment_suffix):
            for j1 in range(0, 3):   # compare cat1 answers
                major_ans = df.loc[i, mvote_colname+"cat1"]
                workeranswerpair = df.loc[i, "MA"+str(j1)+"_workerid"]
                print workeranswerpair
                items = workeranswerpair.split(ANSWERCELL_SEP)
                worker = items[0].strip()
                answer = items[1].strip()
                
                print answer,"  ",major_ans,"  ",gold_ans
                if answer != major_ans:
                    majority_disagr[worker] = majority_disagr[worker] + 1
                if answer != gold_ans:
                    gold_disagr[worker] = gold_disagr[worker] + 1
                numofannotations[worker] = numofannotations[worker] + 1
            
            for j1 in range(3, 6):   # compare cat1 answers
                major_ans = df.loc[i, mvote_colname+"cat2"]
                workeranswerpair = df.loc[i, "MA"+str(j1)+"_workerid"]
                items = workeranswerpair.split(ANSWERCELL_SEP)
                worker = items[0].strip()
                answer = items[1].strip()
                
                print answer,"  ",major_ans,"  ",gold_ans
                if answer != major_ans:
                    majority_disagr[worker] = majority_disagr[worker] + 1
                if answer != gold_ans:
                    gold_disagr[worker] = gold_disagr[worker] + 1
                numofannotations[worker] = numofannotations[worker] + 1
    
    cols = ["annotatorid", "nMajorityDisagr", "nGoldDisagr", "nAnnotations", "weightedMajorityDisagr", "weightedGoldDisagr"]
    matrix = np.zeros([len(annotators), len(cols)], dtype=object)
    adf = pd.DataFrame(matrix, index=range(len(annotators)), columns=cols)
    for i,worker in enumerate(annotators):
        adf.loc[i, "annotatorid"] = worker
        adf.loc[i, "nMajorityDisagr"] = majority_disagr[worker]
        adf.loc[i, "nGoldDisagr"] = gold_disagr[worker]
        adf.loc[i, "nAnnotations"] = numofannotations[worker]
        if numofannotations[worker] == 0:
            adf.loc[i, "weightedMajorityDisagr"] = -1
            adf.loc[i, "weightedGoldDisagr"] = -1
        else:
            adf.loc[i, "weightedMajorityDisagr"] = round(majority_disagr[worker] / float(numofannotations[worker]), 4)
            adf.loc[i, "weightedGoldDisagr"] = round(gold_disagr[worker] / float(numofannotations[worker]), 4)
    
    IOtools.tocsv(adf, outpath)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:59,代码来源:analyse_mturk.py

示例12: content_adjectivecount

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
 def content_adjectivecount(self):
     incsvpath = os.path.join(self.inmatrixfolder, "content-postagCOUNT.csv")
     fname = "content-adjectivecount"
     outcsvpath = os.path.join(self.outmatrixfolder, fname+".csv")
     featurename = fname
     postagdf = IOtools.readcsv(incsvpath, keepindex=True)
     adj_count_vect = postagdf.loc[:, "ADJ"].values
     countdf = pd.DataFrame(adj_count_vect, index=postagdf.index.values.tolist(), columns=[featurename])
     IOtools.tocsv(countdf, outcsvpath, keepindex=True)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:11,代码来源:learnerFunc.py

示例13: title_abstractwords_presence

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
 def title_abstractwords_presence(self):
     incsvpath = os.path.join(self.inmatrixfolder, "titletermCOUNT.csv")
     outcsvpath = os.path.join(self.outmatrixfolder, "title-abswordsBINARY.csv")
     words = keywordhandler.get_abstractwords()
     maindf = IOtools.readcsv(incsvpath, keepindex=True)
     mainmatrix = maindf.values
     np.place(mainmatrix, mainmatrix > 0, 1)  # map counts to presence values (1 if count > 0 else 0) 
     presencedf = pd.DataFrame(mainmatrix, index=maindf.index.values.tolist(), columns=maindf.columns.values.tolist())
     filtereddf = matrixhelpers.search_words_in_df(presencedf, words)
     IOtools.tocsv(filtereddf, outcsvpath, keepindex=True)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:12,代码来源:learnerFunc.py

示例14: get_annotation_matrix

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def get_annotation_matrix(df, outfolder, 
                          noworkerid=True,
                          includehitid=True,
                          nMturkAnnotators=7, nExpertAnnotators=2, 
                          nMturkFinalCol=1, nExperFinalCol=1,
                          remove_comments=False):
    filesentencemap = get_file_sentence_map(df, remove_comments)
    
    colnames = ["docid", "sentenceid"]
    mturkercols = ["MA"+str(i) for i in range(nMturkAnnotators)]
    mturk_final_col = ["Mturk_final"]
    expertcols = ["EA"+str(i) for i in range(nExpertAnnotators)]
    expert_final_col = ["GOLD"]
    
    colnames += mturkercols + mturk_final_col + expertcols + expert_final_col
    
    matrix = []
    for filename, sentences in filesentencemap.iteritems():
        filedf = df[["hitid", "workerid"] + sentences] # annotations for the sentences of one file
        filedf = filedf.dropna(axis=0, how="all", subset=sentences) # clear unrelated rows, those contain no annotations for these sentences
        filedf = reindex_df(filedf)
        filedf = filedf.fillna(value="NaN")
        
        for sentenceid in sentences:
            fileid = filename[len(ANSWER_PREFIX)-1:]
            trimmed_sentenceid = sentenceid[len(ANSWER_PREFIX)-1:]
            line = [fileid, trimmed_sentenceid]
            
            mturk_answers = []
            nannotators = filedf.shape[0]
            for i in range(nannotators):
                annotatorid = filedf.loc[i, "workerid"]
                hitid = filedf.loc[i, "hitid"]
                answer = filedf.loc[i, sentenceid]
                answer = answer.split("_")[-1]
                
                print "types: ", type(annotatorid), " ", type(answer), "  ", answer 
                answercell = annotatorid + ANSWERCELL_SEP + answer
                if noworkerid:
                    answercell = answer
                if includehitid and noworkerid:
                    if sentenceid.endswith(comment_suffix):
                        answercell = annotatorid + ANSWERCELL_SEP + answer 
                    #elif sentenceid.endswith(nonsense_suffix):
                        #answercell = answer
                    else:
                        answercell = hitid + ANSWERCELL_SEP + answer
                mturk_answers.append(answercell)
            line.extend(mturk_answers)
            line.extend(np.zeros(len(colnames)-len(line), dtype=int).tolist())  # fill 0's for the yet unknown cols (expert labels)
            matrix.append(line)
    
    annotationdf = pd.DataFrame(matrix, columns=colnames)
    outcsvpath = os.path.join(outfolder, "annotationdf_whitid_wcomments_noworkerid.csv")
    IOtools.tocsv(annotationdf, outcsvpath)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:57,代码来源:analyse_mturk.py

示例15: insert_texts

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def insert_texts(df, sourcedf, outfilepath):
    sindices = sourcedf.index.tolist()
    c = 0
    for i in sindices:
        docid = sourcedf.loc[i, "docid"]
        sentenceid = sourcedf.loc[i, "sentenceid"]
        label = sourcedf.loc[i, "goldlabel"]
        text = sourcedf.loc[i, "text"]
        df.loc[(df["docid"] == docid) & (df["sentenceid"] == sentenceid), "text"] = text
        df.loc[(df["docid"] == docid) & (df["sentenceid"] == sentenceid), "GOLD"] = label
        if len(df.loc[(df["docid"] == docid) & (df["sentenceid"] == sentenceid), "GOLD"]) != 0:
            c += 1
    print c, " found"
    IOtools.tocsv(df, outfilepath)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:16,代码来源:analyse_mturk.py


注:本文中的sentimentfinding.IOtools.tocsv方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。