当前位置: 首页>>代码示例>>Python>>正文


Python IOtools.todisc_list方法代码示例

本文整理汇总了Python中sentimentfinding.IOtools.todisc_list方法的典型用法代码示例。如果您正苦于以下问题:Python IOtools.todisc_list方法的具体用法?Python IOtools.todisc_list怎么用?Python IOtools.todisc_list使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sentimentfinding.IOtools的用法示例。


在下文中一共展示了IOtools.todisc_list方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: getwordsandlemmasfromfile

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def getwordsandlemmasfromfile():
    rootpath = "/home/dicle/Dicle/Tez/geziyakurdiproject/"
    
    corpuspath = rootpath + os.sep + "texts/"
    outwordspath = rootpath + os.sep + "weightedwords/"
    
    fileids = IOtools.getfilenames_of_dir(corpuspath, removeextension=False)
    
    
    
    for fileid in fileids:
        txt = texter.readtxtfile(corpuspath+os.sep+fileid)
        
        marker = "Haziran 2013"
        mark = txt.find(marker)    # skip metadata
        txt = txt[mark+len(marker):]
        
        words = texter.getwords(txt)
        lemmatuples = SAKsParser.findrootsinlexicon(words)
        roots = [root for _,root,_ in lemmatuples]
        
        fdwords = nltk.FreqDist(words)
        
        fdroots = nltk.FreqDist(roots)
        
        weightedwords = [word+"\t"+str(fdwords[word]) for word in list(fdwords)]
        weightedroots = [root+"\t"+str(fdroots[root]) for root in list(fdroots)]
               
        IOtools.todisc_list(outwordspath+os.sep+"lemma"+os.sep+fileid, weightedwords)
        IOtools.todisc_list(outwordspath+os.sep+"root"+os.sep+fileid, weightedroots)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:32,代码来源:articleanalysis.py

示例2: reportresults

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
 def reportresults(self, ytrue, ypred, experimentname):
     
     '''
     precision, recall, f1score, _ = metrics.precision_recall_fscore_support(ytrue, ypred)     
     print precision, recall, f1score
     '''
     #print ytrue
     #print ypred     
        
     precision = metrics.precision_score(ytrue, ypred, pos_label=None, average="macro")
     recall = metrics.recall_score(ytrue, ypred, pos_label=None, average="macro")
     f1score = metrics.f1_score(ytrue, ypred, pos_label=None, average="macro")
     accuracy = metrics.accuracy_score(ytrue, ypred)
     
     scoreline = metaexperimentation.csvsep.join(map(lambda x : str(x), [experimentname, precision, recall, f1score, accuracy]))
     IOtools.todisc_txt("\n"+scoreline, self.scorefilepath, mode="a")
     
     modelscorereportpath = os.path.join(self.experimentrootpath, experimentname+".txt")   
     try:
         scorereportstr = metrics.classification_report(ytrue, ypred, target_names=self.labelnames)
     except:
         scorereportstr = "zero division error\n"
     IOtools.todisc_txt(scorereportstr, modelscorereportpath)
     
     # record instances
     path = modelscorereportpath = os.path.join(self.experimentrootpath, "instances", experimentname+".csv")
     iheader = ["ytrue\t ypred"]
     instances = [str(true)+"\t"+str(pred) for (true, pred) in zip(ytrue, ypred)]
     IOtools.todisc_list(path, iheader+instances)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:31,代码来源:learner.py

示例3: diff_word_lists

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def diff_word_lists(list1, list2, outdir, outfilename):
    l = list(set(list1) - set(list2))
    IOtools.todisc_list(outdir+os.sep+outfilename+".txt", l)
    
    fdist = nltk.FreqDist(l)
    IOtools.todisc_freqdist(outdir+os.sep+"weighted-"+outfilename+".txt", fdist)
    return l
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:9,代码来源:articleanalysis.py

示例4: get_brands

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def get_brands(fname):
    f = open(fname, "r")
    lines = f.readlines()
    brands = []
    for line in lines:
        brand = line.split("_")[0]
        brands.append(brand.strip())
    
    brands = list(set(brands))
    IOtools.todisc_list(rootdir+os.sep+"brands.txt", brands)
    return brands
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:13,代码来源:retrieveimages.py

示例5: get_most_similar_pairs

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
 def get_most_similar_pairs(self, simdf, filemarker):
     docs = simdf.index.values.tolist()
     similarpairs = []
     output = []
     for doc in docs:
         maxval, maxind = numericutils.get2ndmax(simdf.loc[doc].values)
         doc2 = simdf.columns.values[maxind]
         similarpairs.append((doc, doc2, maxval))
         output.append(doc+" # "+doc2+" : "+str(maxval))
     
     IOtools.todisc_list(self.recordpath+os.sep+"similarpairs"+filemarker+".txt", output)
     return similarpairs
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:14,代码来源:documentsimilarity.py

示例6: write_docterm_weight

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
 def write_docterm_weight(self, df, rootpath):
     docs = df.index.values.tolist()
     words = df.columns.values.tolist()
     
     for doc in docs:
         docid_termpairs = []
         for word in words:
             val = df.loc[doc, word]
             if val > 0.0:
                 output = word + "\t" + str(val)
                 docid_termpairs.append(output)
         IOtools.todisc_list(rootpath+os.sep+doc, docid_termpairs)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:14,代码来源:documentsimilarity.py

示例7: merge_word_lists

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def merge_word_lists(indirectory, outdirectory, outfilename):
    fileids = IOtools.getfilenames_of_dir(indirectory, removeextension=False)
    

    allwords = []    
    for fileid in fileids:
        words = IOtools.readtextlines(indirectory+os.sep+fileid)
        allwords.extend(words)
    IOtools.todisc_list(outdirectory+os.sep+outfilename+".txt", allwords)
    
    fdist = nltk.FreqDist(allwords)
    IOtools.todisc_freqdist(outdirectory+os.sep+"weighted-"+outfilename+".txt", fdist)
    '''
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:15,代码来源:articleanalysis.py

示例8: crawl_folds_for_sets

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def crawl_folds_for_sets(foldpath, outrootpath):
    folds = IOtools.getfoldernames_of_dir(foldpath)  
    trainitems_fname = "trainitems.csv"
    testitems_fname = "testitems.csv"
    items = {"trainitems.csv" : [], "testitems.csv" : []} 
    for fold in folds:
        p1 = os.path.join(foldpath, fold)
        for fname in items.keys():
            p2 = os.path.join(p1, fname)
            df = IOtools.readcsv(p2, keepindex=True)
            fileids = df.index.values.tolist()
            outpath = os.path.join(outrootpath, "all-"+fname[:-4]+".txt")
            IOtools.todisc_list(outpath, fileids, mode='a')
            items[fname].extend(fileids)
    return items
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:17,代码来源:shell.py

示例9: parseXML_phraseslexicon

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def parseXML_phraseslexicon(xmlinfilepath, txtoutfilepath):
    xmlfnames = IOtools.getfilenames_of_dir(xmlinfilepath, removeextension=False)   
    statsstr = "letter       numofphrases"+"\n"
    for fname in xmlfnames:
        letter = fname.split("_")[-1][:-4]    # each fname is of the form "ADB_letter.xml"
        print fname
        path = xmlinfilepath + os.sep  + fname
        tree = ET.parse(path)
        lexiconroot = tree.getroot()
        names = lexiconroot.findall(deyimDOMpath)
        phrases = []
        for name in names:
            phrase = name.text
            phrases.append(phrase.strip().lower())
        outpath = txtoutfilepath + os.sep + letter + ".txt"
        IOtools.todisc_list(outpath, phrases)  
        statsstr += letter+"\t"+str(len(phrases))+"\n"
    IOtools.todisc_txt(statsstr, txtoutfilepath+os.sep+"originalstats.txt")
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:20,代码来源:parsexml.py

示例10: intersect_word_lists

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def intersect_word_lists(list1, list2, outdir, outfilename):
    intersect = []
    allwords = []
    allwords.extend(list1)
    allwords.extend(list2)
    
    
    
    for w1 in list1:
        if w1 in list2:
            intersect.append(w1)
    IOtools.todisc_list(outdir+os.sep+outfilename+".txt", intersect)
    
    fdist = nltk.FreqDist(intersect)
    fdist2 = nltk.FreqDist(list2)
    for item in list(fdist2):
        fdist.inc(item, fdist2[item])
    IOtools.todisc_freqdist(outdir+os.sep+"weighted-"+outfilename+".txt", fdist)
    return intersect
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:21,代码来源:articleanalysis.py

示例11: get_jointlexicon

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def get_jointlexicon():
    engtr = texter.readtextlines(tr_translations)
    mpqa = texter.readtextlines(mpqapath)
    
    newclues = []
    for tr,en in zip(engtr, mpqa):
        # edit original en words from mpqa
        items = en.split()
        items = filter(lambda x : (not x.startswith('len') and not x.startswith('stem') and len(x)>1), items)
        print en," ###### ",items
        items = [item.split("=")[1] for item in items]
        enclue = " ".join(items)
        
        # edit tr words
        trword = tr.split(":")[1]
        trword = trword.strip().lower()
        s = enclue + "\t" + trword
        newclues.append(s)
    IOtools.todisc_list(jointlexiconpath, newclues)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:21,代码来源:lexiconhandler.py

示例12: yapmak

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
 
   #IOtools.mergetxtfiles(phrasestxtout, phrasestxtout+os.sep+"tr_phrases.txt")
    
   #s = 'dedigini (veya soyledigini) yapmak (veya etmek)'
   #s = "(birine) dedigini yapmak (veya soylemek) (kendi)"
   s = "dedigini yapmak"
   print editphrase(s) 
   
   
   
   allphrases = IOtools.readtextlines(phrasestxtout+os.sep+"tr_phrases.txt")
   refinedphrases = []
   for p in allphrases:
       refinedphrases.append(editphrase(p))
   
   IOtools.todisc_list(phrasestxtout+os.sep+"tr_phrasesEDIT.txt", refinedphrases)
   
   '''    
   fname = "ADB_a.xml"
   
   path = phrasespath + os.sep + fname
   tree = ET.parse(path)
   root = tree.getroot()
   
   
   print root.findtext("name")
   names = root.findall("entry/name")
   
   for name in names:
       print name.text
   
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:32,代码来源:parsexml.py

示例13: lemmatize_lexicon

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def lemmatize_lexicon(fname):
    words = texter.readtextlines(trpath+os.sep+fname)
    lemmata = SAKsParser.lemmatize_lexicon(words)
    IOtools.todisc_list(trpath+os.sep+"lemmatized_"+fname, lemmata)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:6,代码来源:lexiconhandler.py

示例14: categorize_lexicon

# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
    
def categorize_lexicon():
    lexicon = texter.readtextlines(jointlexiconpath) 
    fname = ""
    
    for line in lexicon:
        items = line.split()
        t = items[0]
        p = items[3]
        fname = t+"_"+p+".txt"
        s = items[1]+" "+items[2]+"\t"+items[4]+"\n"
        IOtools.txttodisc_append(s, trpath+os.sep+fname) 

def lemmatize_lexicon(fname):
    words = texter.readtextlines(trpath+os.sep+fname)
    lemmata = SAKsParser.lemmatize_lexicon(words)
    IOtools.todisc_list(trpath+os.sep+"lemmatized_"+fname, lemmata)
               


if __name__ == "__main__":
    #get_jointlexicon()
    #categorize_lexicon()
    #lemmatize_lexicon("tr_strongsubjective.txt")
    
    l = texter.readtextlines(trpath+"/tr_strongsubjective.txt")
    l = list(set(l))
    l.sort()
    IOtools.todisc_list(trpath+"/tr_strongsubjective_vv.txt", l)
    
        
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:31,代码来源:lexiconhandler.py


注:本文中的sentimentfinding.IOtools.todisc_list方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。