当前位置: 首页>>代码示例>>Python>>正文


Python sentimentfinding.IOtools类代码示例

本文整理汇总了Python中sentimentfinding.IOtools的典型用法代码示例。如果您正苦于以下问题:Python IOtools类的具体用法?Python IOtools怎么用?Python IOtools使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了IOtools类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: csv2latextable_algorithm

def csv2latextable_algorithm(inpath, outpath, filename, metricname):
    
    header = "\\begin{table}[h] \n \
\\begin{center} \n \
\\begin{tabular}{|p{9cm}|p{2cm}|p{2cm}|p{2cm}|} \n \
\\hline  \\bf algorithm \& parameters & \\bf mean "+ metricname +" & \\bf minimum "+ metricname +" & \\bf maximum "+ metricname +"   \\\ \\hline"
    
    footer = "\\end{tabular} \n \
\\end{center} \n \
\\caption{\\label{alg-"+metricname[:4]+"-stats} Mean, maximum and minimum "+metricname+" results for 27 learning models } \n \
\\end{table}"
    
    ip1 = os.path.join(inpath, filename+".csv")
    df = IOtools.readcsv(ip1, keepindex=True)
    nrows, ncols = df.shape
    rowids = df.index.values.tolist()
    
    out = header+"\n"
    for rowid in rowids:
        featset = rowid[4:]
        featset = "\\verb|"+featset+"|"
        
        out += featset + " & "
        #np.round(a, decimals, out)
        mean = df.loc[rowid, "mean"]
        min = df.loc[rowid, "min"]
        max = df.loc[rowid, "max"]
        stats = map(lambda x : str(round(x, 5)), [mean, min, max])
        statsstr = " & ".join(stats)
        out += statsstr + " \\\ \hline " + "\n"
    
    out += footer
    IOtools.todisc_txt(out, os.path.join(outpath, filename+".txt"))
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:33,代码来源:latexhelpers.py

示例2: csv2latextable_featset

def csv2latextable_featset(inpath, outpath, filename, metricname):
    
    header = "\\begin{table}[h] \n \
\\begin{center} \n \
\\begin{tabular}{|p{5cm}|p{2cm}|p{2cm}|p{2cm}|} \n \
\\hline  \\bf feature-combined dataset name & \\bf mean "+ metricname +" & \\bf minimum "+ metricname +" & \\bf maximum "+ metricname +"   \\\ \\hline"
    
    footer = "\\end{tabular} \n \
\\end{center} \n \
\\caption{\\label{featset-"+metricname[:4]+"-stats} Mean, maximum and minimum "+metricname+" results for 8 feature-measure-combined datasets } \n \
\\end{table}"
    
    ip1 = os.path.join(inpath, filename+".csv")
    df = IOtools.readcsv(ip1, keepindex=True)
    nrows, ncols = df.shape
    rowids = df.index.values.tolist()
    
    out = header+"\n"
    for rowid in rowids:
        featset = rowid.split("**")[0].strip()
        featset = "\\verb|"+featset+"|"
        out += featset + " & "
        #np.round(a, decimals, out)
        mean = df.loc[rowid, "mean"]
        min = df.loc[rowid, "min"]
        max = df.loc[rowid, "max"]
        stats = map(lambda x : str(round(x, 5)), [mean, min, max])
        statsstr = " & ".join(stats)
        out += statsstr + " \\\ \hline " + "\n"
    
    out += footer
    IOtools.todisc_txt(out, os.path.join(outpath, filename+".txt"))
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:32,代码来源:latexhelpers.py

示例3: buildcorpus

def buildcorpus(nfile, ncat, resourcename, path):
    resourcepath = path + os.sep + resourcename
    catnames = IOtools.getfoldernames_of_dir(resourcepath)[:ncat]
    
    featurematrix = []
    doctermmatrix = []
    cfdTermDoc = nltk.ConditionalFreqDist()
    
    for catname in catnames:
        fileids = []
        p = resourcepath + os.sep + catname + os.sep
        fileids.extend(IOtools.getfilenames_of_dir(p, removeextension=False)[:nfile])
        corpus = CorpusFeatures(fileids, resourcename+os.sep+catname, p)
        corpus.getfeatures()
        datapoints = corpus.build_featurematrix()
        for k,v in datapoints.iteritems():
            featurematrix.append([k]+v+[resourcename])
            
        corpus.plot_features()
        
        #doc term matrix
        cfd = corpus.build_termmatrix()
        for fileid in cfd.conditions():
            for term in list(cfd[fileid]):
                cfdTermDoc[fileid].inc(term)
    
    IOtools.todisc_matrix(featurematrix, IOtools.results_rootpath+os.sep+"MATRIX"+str(nfile*ncat)+"texts.txt", mode="a")
开发者ID:dicleoztur,项目名称:tez0.1v,代码行数:27,代码来源:dataspace.py

示例4: run_copy_from_gold

def run_copy_from_gold():
    maincsvpath = "/home/dicle/Dropbox/ukp/fallacy_detection/mturk_annotations/annotationdf_worker.csv"
    indf = IOtools.readcsv(maincsvpath)
    sourcecsvpath = "/home/dicle/Dropbox/ukp/fallacy_detection/expertandgoldannotations/gold-labels3.csv"
    sourcedf = IOtools.readcsv(sourcecsvpath)
    outfilepath = "/home/dicle/Dropbox/ukp/fallacy_detection/mturk_annotations/annotationdf_wtexts_wmajority_worker.csv"
    insert_texts(indf, sourcedf, outfilepath)    
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:7,代码来源:analyse_mturk.py

示例5: get_allfolds_bigdf

def get_allfolds_bigdf(foldrootpath, annottype, featset, labelunion):
    
    bigdf = pd.DataFrame(columns=metaexperimentation.performanceheader)
    
    folds = IOtools.getfoldernames_of_dir(foldrootpath)
                        
    for foldno in folds:
        p1 = os.path.join(foldrootpath, foldno)
                                    
        scorecsvfilepath = p1 + os.sep + metaexperimentation.scorefilename+".csv"
        scorecsvfile = IOtools.readcsv(scorecsvfilepath)
        
        print " scorefile ",scorecsvfilepath,"  ",scorecsvfile.shape
        
        #rankdf = matrixhelpers.get_first_N_rows(scorecsvfile, int(N / 2), metricnames, ascend=takeworst)
        rankdf = scorecsvfile.copy()
        rankdf["labelunion"] = labelunion
        rankdf["featureset"] = featset 
        rankdf["annottype"] = annottype
        rankdf["fold"] = foldno
        bigdf = bigdf.append(rankdf)
        #dflist.append(rankdf)
    
    
    print "FOLDROOTPATH ",foldrootpath
    outcsvpath = os.path.join(foldrootpath, "bigdf.csv")
    IOtools.tocsv(bigdf, outcsvpath, False)
开发者ID:dicleoztur,项目名称:tez0.1v,代码行数:27,代码来源:performance_evaluation_crossval.py

示例6: prepare_experiment

    def prepare_experiment(self, Xpath, ypath, erootpath, labelnames=None):
        
        self.datapath = Xpath
        self.labelpath = ypath
        
        #if erootpath:
        self.set_score_folder(erootpath)
        
        yvector = IOtools.readcsv(ypath, True)
        self.ylabels = yvector.answer.values
        yvals = self.ylabels.copy().tolist()
        #print "y vals ",yvals
        #print "vect ", self.ylabels
        if labelnames is None:
            labelnames = ["class "+str(i) for i in list(set(yvals))]

        
        instanceids = yvector.index.values.tolist()
        datadf = IOtools.readcsv(Xpath, keepindex=True)
        datadf = datadf.loc[instanceids, :]
              
        self.X = datadf.values   
        self.X[np.isnan(self.X)] = 0
        self.X[np.isinf(self.X)] = 0
        
        '''  do it inside models
        if normalize:
            self.X = preprocessing.normalize(self.X, axis=0)
        '''
        '''  can't apply standardization as it results in negative entries in the matrix, 
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:30,代码来源:learner.py

示例7: getwordsandlemmasfromfile

def getwordsandlemmasfromfile():
    rootpath = "/home/dicle/Dicle/Tez/geziyakurdiproject/"
    
    corpuspath = rootpath + os.sep + "texts/"
    outwordspath = rootpath + os.sep + "weightedwords/"
    
    fileids = IOtools.getfilenames_of_dir(corpuspath, removeextension=False)
    
    
    
    for fileid in fileids:
        txt = texter.readtxtfile(corpuspath+os.sep+fileid)
        
        marker = "Haziran 2013"
        mark = txt.find(marker)    # skip metadata
        txt = txt[mark+len(marker):]
        
        words = texter.getwords(txt)
        lemmatuples = SAKsParser.findrootsinlexicon(words)
        roots = [root for _,root,_ in lemmatuples]
        
        fdwords = nltk.FreqDist(words)
        
        fdroots = nltk.FreqDist(roots)
        
        weightedwords = [word+"\t"+str(fdwords[word]) for word in list(fdwords)]
        weightedroots = [root+"\t"+str(fdroots[root]) for root in list(fdroots)]
               
        IOtools.todisc_list(outwordspath+os.sep+"lemma"+os.sep+fileid, weightedwords)
        IOtools.todisc_list(outwordspath+os.sep+"root"+os.sep+fileid, weightedroots)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:30,代码来源:articleanalysis.py

示例8: report_results

 def report_results(self):
     self.compute_precision()
     self.compute_recall()
     self.compute_fmeasure()
     self.compute_accuracy()
     
     IOtools.todisc_matrix(self.confusionmatrix, self.folder+os.sep+self.experimentname+".confmat")
     
     
     
     f = codecs.open(self.folder+os.sep+self.experimentname+".results", "a", encoding='utf8')
     # write report as list not to keep the whole string in memory
     header = "\t" + "\t".join(self.catmetrics.keys()) +"\n"
     f.write(header)
     
     labelencoding, _ = classfhelpers.classlabelindicing(self.classes)    # labeldecoding contains indices
     for c in self.classes:
         i = labelencoding[c]
         line = []
         line.append(c)
         for metricname in self.catmetrics.keys():
             line.append(self.catmetrics[metricname][i])
         line = map(lambda x : str(x), line)
         outstr = "\t".join(line) + "\n"
         f.write(outstr)
     f.write("\nAccuracy: "+str(self.accuracy))
     f.close()
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:27,代码来源:clsshell.py

示例9: get_user_text_distributions

 def get_user_text_distributions(self):
     #users = range(1, self.ncoders+1)
     
     # 1- get single-annotation list
     # 2- get double-annotation list
     
     usertextassignment = {}
     singleannot_distribution = Selection()
     singleannot_distribution.initialize(self.months, self.resources, self.cats)
     for i,user in enumerate(self.coders):
         oneuser_distribution, assignment = self.justice_selection(self.nsingle)   # will return textids as (newsid-res-cat) # handle selected_texts here
         usertextassignment[i] = assignment
         singleannot_distribution.update_selection(oneuser_distribution)
     
     # record userassign. and distribution
     #self.singles_jsonpath = os.path.join(self.outfolder, "singleannotation_assignments.txt")
     IOtools.todisc_json(self.singles_jsonpath, usertextassignment, ind=5)
     singleannot_distribution.todisc(os.path.join(self.outfolder, "singleannotation_distribution.txt"))
     
     
     textassignments = {}
     # BURADA numberofdoubleannotatabletexts sayısında bug var. (self.ncoders/2)*self.noverlaps olmalı.
     #doubleannot_distribution, textassignments = self.justice_selection(self.ncoders * self.noverlaps)
     doubleannot_distribution, textassignments = self.justice_selection(int(self.ncoders / 2.0) * self.noverlaps)
     
     #self.doubles_jsonpath = os.path.join(self.outfolder, "doubleannotation_assignments.txt")
     IOtools.todisc_json(self.doubles_jsonpath, textassignments)
     doubleannot_distribution.todisc(os.path.join(self.outfolder, "doubleannotation_distribution.txt"))
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:28,代码来源:annotationbuilder_old.py

示例10: diff_word_lists

def diff_word_lists(list1, list2, outdir, outfilename):
    l = list(set(list1) - set(list2))
    IOtools.todisc_list(outdir+os.sep+outfilename+".txt", l)
    
    fdist = nltk.FreqDist(l)
    IOtools.todisc_freqdist(outdir+os.sep+"weighted-"+outfilename+".txt", fdist)
    return l
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:7,代码来源:articleanalysis.py

示例11: evaluate_crosscorpus

def evaluate_crosscorpus(scoresroot):
    
    featclasses = IOtools.getfoldernames_of_dir(scoresroot)
    
    for featureclass in featclasses:
        
        p1 = os.path.join(scoresroot, featureclass)
        lunions = IOtools.getfoldernames_of_dir(p1)
        
        for labelunion in lunions:
            
            p2 = os.path.join(p1, labelunion)

            testcases = IOtools.getfoldernames_of_dir(p2)
            
            for testcase in testcases:
                
                p3 = os.path.join(p2, testcase)
                traincases = IOtools.getfoldernames_of_dir(p3)
                
                for traincase in traincases:
                    
                    p4 = os.path.join(p3, traincase)   # foldspath
                    get_allfolds_bigdf(foldrootpath=p4, 
                                       annottype=testcase + " ** "+traincase, 
                                       featset=featureclass, 
                                       labelunion=labelunion)
                    
                    get_fold_averages(p4)
开发者ID:dicleoztur,项目名称:tez0.1v,代码行数:29,代码来源:performance_evaluation_crossval.py

示例12: get_randomly_annotated_set

def get_randomly_annotated_set(incsvfilename, outcsvfilename,
                               incsvfolder=metacorpus.userannotatedpath, outcsvfolder=metacorpus.randomannotatedpath, 
                               randomchoicevalues=metacorpus.subjectivity_label_values.keys()):
    df = IOtools.readcsv(os.path.join(incsvfolder, incsvfilename))  # df cols: questionname,userid,answer
    randomdf= df.copy()
    numofrows, _ = randomdf.values.shape
    subjvalues = randomchoicevalues
    
    randomanswers = [random.choice(subjvalues) for _ in range(numofrows)]
    randomdf.loc[:, "answer"] = randomanswers
    
    # extra: assign 5 of the rows the value 5 for the answer 'no idea, ambiguous'
    notknowingrows = random.sample(range(numofrows), 5)
    
    '''
    for _ in range(5):
        randindex = random.randint(0, numofrows-1)
        while randindex in notknowingrows:
            randindex = random.randint(0, numofrows-1)
        notknowingrows.append(randindex)
    '''        
    #notknowingrows = [random.randint(0, numofrows-1) for _ in range(5)]  # be careful with this 5 number it is subject to change for the sake of statistical validity
    randomdf.loc[notknowingrows, "answer"] = 5
    
    IOtools.tocsv(randomdf, os.path.join(outcsvfolder, outcsvfilename))
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:25,代码来源:goldsetbuilder.py

示例13: metadata_tabular

def metadata_tabular():
    rpath = "/home/dicle/Dicle/Tez/geziyakurdiproject/corpus2/ldatests22Temmuz/wordletest/words/temp/"
    metadf = pd.read_csv(rpath+"/metadocs.csv", index_col=None, sep="\t")
    
    print metadf.loc[0,"Author"]
    metadf = metadf.sort(["Polarity", "Date", "Author"], ascending=[False, True, True])
    v = metadf.iloc[0,:]
    print v.loc["Author"],v.loc["Resource"]
    
    header = "\\begin{tabular}{l  | c | c | c | c } \n \
kategori & yazar & başlık & tarih & yayın \\\\ \n \
\\hline \\hline \n"

    end = "\\end{tabular}"
    outltx = ""
    numofdocs, fields = metadf.shape
    for i in range(numofdocs):
        row = metadf.iloc[i,:]
        cat = row.loc["Polarity"]
        cat = "\\textbf{"+cat+"}"
        author = row.loc["Author"]
        title = row.loc["Title"]
        link = row.loc["Link"]
        date = row.loc["Date"]
        resource = row.loc["Resource"]
        
        title = "\\href{"+link+"}{"+title+"}"
        date = "\\textit{"+date+"}"
        resource = "@"+resource
        
        s = " & ".join([cat, author, title, date, resource])
        outltx = outltx + s + "\\\\ \n \\hline \n"
    
    outltx = header + outltx + end
    IOtools.todisc_txt(outltx, rpath+"docswordle_tableLaTeX.txt")
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:35,代码来源:preparelatex.py

示例14: add_resource_label

def add_resource_label(matrixpath, datasetname, replacelabel=False, headers=True):
    matrixlines = IOtools.readtextlines(matrixpath)  # 1st item=fileid, lastitem=filecat.
    
    newmatrix = []
    
    if headers:
        matrixlines = matrixlines[2:]
    
    for instance in matrixlines:
        items = instance.split()
        fileid = items[0]
        print instance,
        path = datapath+os.sep+datasetname
        foldernames = IOtools.getfoldernames_of_dir(datapath+os.sep+datasetname)
        #print foldernames
        for folder in foldernames:
            allfileids = IOtools.getfilenames_of_dir(path+os.sep+folder, removeextension=False)
            #print allfileids
            if fileid in allfileids:
                newspath = path+os.sep+folder+os.sep+fileid
                resourcename = texter.getnewsmetadata(newspath, ["resource"])["resource"]
                #print "## ",resourcename,"  ",type(instance),"  ~~ ",instance
                
                if replacelabel: items = items[:-1]
                newmatrix.append(items +[resourcename])
                break
    
    return newmatrix
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:28,代码来源:matrixhandler.py

示例15: get_AllObj_AllSubj_class

def get_AllObj_AllSubj_class(originallabelspath, outfolder, in_NC=5):
    
    out_NC = 2
    if in_NC <= out_NC:
        return

    labeldf = IOtools.readcsv(originallabelspath, keepindex=True)
    
    outpath = os.path.join(ensure_unionclass_dir(outfolder, "ALLobj-ALLsubj", out_NC), metacorpus.labelsfilename + ".csv")
    
    labelvector = labeldf.values
    labelvector = np.array(labelvector, dtype=object)
    
    # replace values  12->"sub"; 34->"obj"
    labelvector[labelvector == 1] = 12
    labelvector[labelvector == 2] = 12
    labelvector[labelvector == 3] = 34
    labelvector[labelvector == 4] = 34
    
    for i,_ in enumerate(labelvector):
        if labelvector[i] == 5:
            labelvector[i] = random.choice([12, 34])
    
    twolabeldf = pd.DataFrame(labelvector, columns=labeldf.columns.values.tolist(), index=labeldf.index.values.tolist())
    IOtools.tocsv(twolabeldf, outpath, keepindex=True)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:25,代码来源:arrange_class_unions.py


注:本文中的sentimentfinding.IOtools类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。