本文整理汇总了Python中sentimentfinding.IOtools.todisc_list方法的典型用法代码示例。如果您正苦于以下问题:Python IOtools.todisc_list方法的具体用法?Python IOtools.todisc_list怎么用?Python IOtools.todisc_list使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sentimentfinding.IOtools
的用法示例。
在下文中一共展示了IOtools.todisc_list方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: getwordsandlemmasfromfile
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def getwordsandlemmasfromfile():
rootpath = "/home/dicle/Dicle/Tez/geziyakurdiproject/"
corpuspath = rootpath + os.sep + "texts/"
outwordspath = rootpath + os.sep + "weightedwords/"
fileids = IOtools.getfilenames_of_dir(corpuspath, removeextension=False)
for fileid in fileids:
txt = texter.readtxtfile(corpuspath+os.sep+fileid)
marker = "Haziran 2013"
mark = txt.find(marker) # skip metadata
txt = txt[mark+len(marker):]
words = texter.getwords(txt)
lemmatuples = SAKsParser.findrootsinlexicon(words)
roots = [root for _,root,_ in lemmatuples]
fdwords = nltk.FreqDist(words)
fdroots = nltk.FreqDist(roots)
weightedwords = [word+"\t"+str(fdwords[word]) for word in list(fdwords)]
weightedroots = [root+"\t"+str(fdroots[root]) for root in list(fdroots)]
IOtools.todisc_list(outwordspath+os.sep+"lemma"+os.sep+fileid, weightedwords)
IOtools.todisc_list(outwordspath+os.sep+"root"+os.sep+fileid, weightedroots)
示例2: reportresults
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def reportresults(self, ytrue, ypred, experimentname):
'''
precision, recall, f1score, _ = metrics.precision_recall_fscore_support(ytrue, ypred)
print precision, recall, f1score
'''
#print ytrue
#print ypred
precision = metrics.precision_score(ytrue, ypred, pos_label=None, average="macro")
recall = metrics.recall_score(ytrue, ypred, pos_label=None, average="macro")
f1score = metrics.f1_score(ytrue, ypred, pos_label=None, average="macro")
accuracy = metrics.accuracy_score(ytrue, ypred)
scoreline = metaexperimentation.csvsep.join(map(lambda x : str(x), [experimentname, precision, recall, f1score, accuracy]))
IOtools.todisc_txt("\n"+scoreline, self.scorefilepath, mode="a")
modelscorereportpath = os.path.join(self.experimentrootpath, experimentname+".txt")
try:
scorereportstr = metrics.classification_report(ytrue, ypred, target_names=self.labelnames)
except:
scorereportstr = "zero division error\n"
IOtools.todisc_txt(scorereportstr, modelscorereportpath)
# record instances
path = modelscorereportpath = os.path.join(self.experimentrootpath, "instances", experimentname+".csv")
iheader = ["ytrue\t ypred"]
instances = [str(true)+"\t"+str(pred) for (true, pred) in zip(ytrue, ypred)]
IOtools.todisc_list(path, iheader+instances)
示例3: diff_word_lists
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def diff_word_lists(list1, list2, outdir, outfilename):
l = list(set(list1) - set(list2))
IOtools.todisc_list(outdir+os.sep+outfilename+".txt", l)
fdist = nltk.FreqDist(l)
IOtools.todisc_freqdist(outdir+os.sep+"weighted-"+outfilename+".txt", fdist)
return l
示例4: get_brands
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def get_brands(fname):
f = open(fname, "r")
lines = f.readlines()
brands = []
for line in lines:
brand = line.split("_")[0]
brands.append(brand.strip())
brands = list(set(brands))
IOtools.todisc_list(rootdir+os.sep+"brands.txt", brands)
return brands
示例5: get_most_similar_pairs
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def get_most_similar_pairs(self, simdf, filemarker):
docs = simdf.index.values.tolist()
similarpairs = []
output = []
for doc in docs:
maxval, maxind = numericutils.get2ndmax(simdf.loc[doc].values)
doc2 = simdf.columns.values[maxind]
similarpairs.append((doc, doc2, maxval))
output.append(doc+" # "+doc2+" : "+str(maxval))
IOtools.todisc_list(self.recordpath+os.sep+"similarpairs"+filemarker+".txt", output)
return similarpairs
示例6: write_docterm_weight
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def write_docterm_weight(self, df, rootpath):
docs = df.index.values.tolist()
words = df.columns.values.tolist()
for doc in docs:
docid_termpairs = []
for word in words:
val = df.loc[doc, word]
if val > 0.0:
output = word + "\t" + str(val)
docid_termpairs.append(output)
IOtools.todisc_list(rootpath+os.sep+doc, docid_termpairs)
示例7: merge_word_lists
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def merge_word_lists(indirectory, outdirectory, outfilename):
fileids = IOtools.getfilenames_of_dir(indirectory, removeextension=False)
allwords = []
for fileid in fileids:
words = IOtools.readtextlines(indirectory+os.sep+fileid)
allwords.extend(words)
IOtools.todisc_list(outdirectory+os.sep+outfilename+".txt", allwords)
fdist = nltk.FreqDist(allwords)
IOtools.todisc_freqdist(outdirectory+os.sep+"weighted-"+outfilename+".txt", fdist)
'''
示例8: crawl_folds_for_sets
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def crawl_folds_for_sets(foldpath, outrootpath):
folds = IOtools.getfoldernames_of_dir(foldpath)
trainitems_fname = "trainitems.csv"
testitems_fname = "testitems.csv"
items = {"trainitems.csv" : [], "testitems.csv" : []}
for fold in folds:
p1 = os.path.join(foldpath, fold)
for fname in items.keys():
p2 = os.path.join(p1, fname)
df = IOtools.readcsv(p2, keepindex=True)
fileids = df.index.values.tolist()
outpath = os.path.join(outrootpath, "all-"+fname[:-4]+".txt")
IOtools.todisc_list(outpath, fileids, mode='a')
items[fname].extend(fileids)
return items
示例9: parseXML_phraseslexicon
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def parseXML_phraseslexicon(xmlinfilepath, txtoutfilepath):
xmlfnames = IOtools.getfilenames_of_dir(xmlinfilepath, removeextension=False)
statsstr = "letter numofphrases"+"\n"
for fname in xmlfnames:
letter = fname.split("_")[-1][:-4] # each fname is of the form "ADB_letter.xml"
print fname
path = xmlinfilepath + os.sep + fname
tree = ET.parse(path)
lexiconroot = tree.getroot()
names = lexiconroot.findall(deyimDOMpath)
phrases = []
for name in names:
phrase = name.text
phrases.append(phrase.strip().lower())
outpath = txtoutfilepath + os.sep + letter + ".txt"
IOtools.todisc_list(outpath, phrases)
statsstr += letter+"\t"+str(len(phrases))+"\n"
IOtools.todisc_txt(statsstr, txtoutfilepath+os.sep+"originalstats.txt")
示例10: intersect_word_lists
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def intersect_word_lists(list1, list2, outdir, outfilename):
intersect = []
allwords = []
allwords.extend(list1)
allwords.extend(list2)
for w1 in list1:
if w1 in list2:
intersect.append(w1)
IOtools.todisc_list(outdir+os.sep+outfilename+".txt", intersect)
fdist = nltk.FreqDist(intersect)
fdist2 = nltk.FreqDist(list2)
for item in list(fdist2):
fdist.inc(item, fdist2[item])
IOtools.todisc_freqdist(outdir+os.sep+"weighted-"+outfilename+".txt", fdist)
return intersect
示例11: get_jointlexicon
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def get_jointlexicon():
engtr = texter.readtextlines(tr_translations)
mpqa = texter.readtextlines(mpqapath)
newclues = []
for tr,en in zip(engtr, mpqa):
# edit original en words from mpqa
items = en.split()
items = filter(lambda x : (not x.startswith('len') and not x.startswith('stem') and len(x)>1), items)
print en," ###### ",items
items = [item.split("=")[1] for item in items]
enclue = " ".join(items)
# edit tr words
trword = tr.split(":")[1]
trword = trword.strip().lower()
s = enclue + "\t" + trword
newclues.append(s)
IOtools.todisc_list(jointlexiconpath, newclues)
示例12: yapmak
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
#IOtools.mergetxtfiles(phrasestxtout, phrasestxtout+os.sep+"tr_phrases.txt")
#s = 'dedigini (veya soyledigini) yapmak (veya etmek)'
#s = "(birine) dedigini yapmak (veya soylemek) (kendi)"
s = "dedigini yapmak"
print editphrase(s)
allphrases = IOtools.readtextlines(phrasestxtout+os.sep+"tr_phrases.txt")
refinedphrases = []
for p in allphrases:
refinedphrases.append(editphrase(p))
IOtools.todisc_list(phrasestxtout+os.sep+"tr_phrasesEDIT.txt", refinedphrases)
'''
fname = "ADB_a.xml"
path = phrasespath + os.sep + fname
tree = ET.parse(path)
root = tree.getroot()
print root.findtext("name")
names = root.findall("entry/name")
for name in names:
print name.text
示例13: lemmatize_lexicon
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def lemmatize_lexicon(fname):
words = texter.readtextlines(trpath+os.sep+fname)
lemmata = SAKsParser.lemmatize_lexicon(words)
IOtools.todisc_list(trpath+os.sep+"lemmatized_"+fname, lemmata)
示例14: categorize_lexicon
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import todisc_list [as 别名]
def categorize_lexicon():
lexicon = texter.readtextlines(jointlexiconpath)
fname = ""
for line in lexicon:
items = line.split()
t = items[0]
p = items[3]
fname = t+"_"+p+".txt"
s = items[1]+" "+items[2]+"\t"+items[4]+"\n"
IOtools.txttodisc_append(s, trpath+os.sep+fname)
def lemmatize_lexicon(fname):
words = texter.readtextlines(trpath+os.sep+fname)
lemmata = SAKsParser.lemmatize_lexicon(words)
IOtools.todisc_list(trpath+os.sep+"lemmatized_"+fname, lemmata)
if __name__ == "__main__":
#get_jointlexicon()
#categorize_lexicon()
#lemmatize_lexicon("tr_strongsubjective.txt")
l = texter.readtextlines(trpath+"/tr_strongsubjective.txt")
l = list(set(l))
l.sort()
IOtools.todisc_list(trpath+"/tr_strongsubjective_vv.txt", l)