本文整理汇总了Python中sentimentfinding.IOtools.tocsv_lst方法的典型用法代码示例。如果您正苦于以下问题:Python IOtools.tocsv_lst方法的具体用法?Python IOtools.tocsv_lst怎么用?Python IOtools.tocsv_lst使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sentimentfinding.IOtools
的用法示例。
在下文中一共展示了IOtools.tocsv_lst方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: generate_user_table
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv_lst [as 别名]
def generate_user_table(self, tablefolder, userlist=None):
if userlist is None:
userlist = self.coders
usertable = [("uid", "uname", "lastevaluationid")]
for i,u in enumerate(userlist):
usertable.append((i, u, -1))
tpath = os.path.join(tablefolder, "users.csv")
IOtools.tocsv_lst(usertable, tpath)
'''
示例2: generate_evalutation_table
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv_lst [as 别名]
def generate_evalutation_table(self, tablefolder, usertextpairs):
evaluationtable = [("eid", "userid", "questionname", "answer", "isanswered", "qorder")]
userids = [userid for userid,_ in usertextpairs]
uqorderdct = initialize_dict(userids)
for i,(uid,qname) in enumerate(usertextpairs):
qorder = uqorderdct[uid]
evaluationtable.append((i, uid, qname, -100, 0, qorder))
uqorderdct[uid] = uqorderdct[uid] + 1
tpath = os.path.join(tablefolder, "evaluations.csv")
IOtools.tocsv_lst(evaluationtable, tpath)
示例3: generate_question_table
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv_lst [as 别名]
def generate_question_table(self, tablefolder, textindices):
df = IOtools.readcsv(self.corpuspath)
questiontable = [("qid", "qname", "qtitle", "qcontent")]
for i,textindex in enumerate(textindices):
resourcename = df.loc[textindex, "resource"]
orgcatname = df.loc[textindex, "originalcatname"]
catname = df.loc[textindex, "category"]
textid = df.loc[textindex, "newsid"]
textid = str(textid).split(".")[0]
print textindex, type(textindex), textid, type(textid)
questionname = resourcename + "-" + catname + "-" + textid
filepath = os.path.join(metacorpus.rawcorpuspath, resourcename, orgcatname, textid+metacorpus.itemext)
title, content = extractnewsmetadata.get_news_article(filepath)
questiontable.append((i, questionname, title, content))
tpath = os.path.join(tablefolder, "questions.csv")
IOtools.tocsv_lst(questiontable, tpath)
示例4: conduct_cross_validation_notest
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv_lst [as 别名]
#.........这里部分代码省略.........
nc = int(nc)
sp4 = IOtools.ensure_dir(os.path.join(sp3, unionname))
ylabelspath = os.path.join(lp2, metacorpus.labelsfilename+".csv")
y = IOtools.readcsv(ylabelspath, True)
labelitems = y.groupby("answer").groups # labelitems = {label : [newsid]}
countlabels = listutils.initialize_dict(labelitems.keys(), val=0.0)
''' skip test division
# record test instances for guranteeing
testinstances = listutils.initialize_dict(labelitems.keys(), val=[])
traininstances = listutils.initialize_dict(labelitems.keys(), val=[])
for label, instanceids in labelitems.iteritems():
ntest = utils.get_nsplit(len(instanceids), metaexperimentation.testpercentage)
testinstances[label] = instanceids[-ntest:]
traininstances[label] = instanceids[:-ntest]
IOtools.todisc_json(os.path.join(sp4, "test_instances"), testinstances)
IOtools.todisc_json(os.path.join(sp4, "train_instances"), traininstances)
'''
checktrs = []
checktss = []
intersectstr = []
intersectsts = []
validstart = 0
for foldno in range(k):
# both will contain (fileid, label)
trainitems = []
testitems = []
for label, fileids in labelitems.iteritems():
nvalid = utils.get_nsplit(len(fileids), metaexperimentation.validationpercentage)
#ntest = utils.get_nsplit(len(fileids), metaexperimentation.testpercentage)
'''
print " LABEL: ",label
print " nvalid: ",nvalid," ntest: ",ntest
'''
instanceids = fileids #fileids[:-ntest]
validstart = (foldno * (nvalid + 1)) % len(fileids)
validfinish = (validstart + nvalid) % len(fileids)
trainids = utils.gettrainset(instanceids, validstart, validfinish) # fileids to be included in the train set
testids = utils.gettestset(instanceids, validstart, validfinish) # fileids to be included in the test set
trainitems.extend([(fileid, label) for fileid in trainids])
testitems.extend([(fileid, label) for fileid in testids])
'''
print " ntrain: ",len(trainids)
print " ntestset: ",len(testids)
if len(trainids) <= len(testids):
print "******* ",foldno,labelunion, label
'''
# check file collision. completed and closed 12:43
'''
coltr = listutils.getintersectionoflists(checktrs, trainids)
colts = listutils.getintersectionoflists(checktss, testids)
intersectstr.extend(coltr)
intersectsts.extend(colts)
'''
'''
print i," ----- ",
print " intersect-train: ",intersectstr," ** intersect-test : ",intersectsts
print
'''
foldpath = IOtools.ensure_dir(os.path.join(sp4, "fold-"+str(foldno)))
metaexperimentation.initialize_score_file(foldpath)
IOtools.tocsv_lst(trainitems, os.path.join(foldpath, "trainitems.csv"))
IOtools.tocsv_lst(testitems, os.path.join(foldpath, "testitems.csv"))
Xtrain, ytrain = utils.tuple2matrix(trainitems, Xpath)
Xtest, ytest = utils.tuple2matrix(testitems, Xpath)
# classify
for model in models:
model.set_score_folder(foldpath)
model.apply_algorithms2(Xtrain, ytrain, Xtest, ytest)
# random and frequency classification for baseline comparison
experimentname = "random"
distinctlabels = list(set(ytest))
ypred = [random.choice(distinctlabels) for _ in range(len(ytest))]
models[0].reportresults(ytest, ypred, experimentname)
'''
示例5: find_word_matrices
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv_lst [as 别名]
def find_word_matrices(self, newsidlist, processcontent=True, prepend="content"):
dateroots = []
datePOStag = []
titleexclamation = [("newsid", "title_exclamation")]
textPOStag = []
textroots = []
textrootsWpostag = []
textliterals = []
print prepend, " processing:"
for newsid in newsidlist:
print "newsid ",newsid
filepath = extractnewsmetadata.newsid_to_filepath(newsid)
content, title, date = extractnewsmetadata.get_news_article2(filepath)
text = ""
if processcontent:
text = content
else:
text = title
if "!" in title:
titleexclamation.append((newsid, 1))
else:
titleexclamation.append((newsid, 0))
words = texter.getwords(text)
lemmata = SAKsParser.lemmatize_lexicon(words)
for (literal, literalPOS, root, rootPOS) in lemmata:
root = texter.cleanword(root)
if (len(root) > 0) or (not root.isspace()):
#print root,
textPOStag.append((newsid, literalPOS))
textroots.append((newsid, root))
textrootsWpostag.append((newsid, root+" Wpostag "+rootPOS))
textliterals.append((newsid, literal+" Wpostag "+literalPOS))
dateroots.append((date, root))
datePOStag.append((date, literalPOS))
cfd_dateroots = ConditionalFreqDist(dateroots)
cfd_datepostag = ConditionalFreqDist(datePOStag)
cfd_textpostag = ConditionalFreqDist(textPOStag)
cfd_textroots = ConditionalFreqDist(textroots)
cfd_textrootWpostag = ConditionalFreqDist(textrootsWpostag)
cfd_textliterals = ConditionalFreqDist(textliterals)
print "some id's", cfd_textroots.conditions()
cfd_roottext = ConditionalFreqDist((word, docid) for docid in cfd_textroots.conditions()
for word in list(cfd_textroots[docid]))
# cfd to csv conditems as cols duzelt:
csvpath = os.path.join(self.matrixpath, prepend+"-dateroot.csv")
CFDhelpers.cfd_to_matrix(cfd_dateroots, csvpath)
csvpath = os.path.join(self.matrixpath, prepend+"-datepostag.csv")
CFDhelpers.cfd_to_matrix(cfd_datepostag, csvpath)
csvpath = os.path.join(self.matrixpath, prepend+"-postagCOUNT.csv")
CFDhelpers.cfd_to_matrix(cfd_textpostag, csvpath)
termcountcsvpath = os.path.join(self.matrixpath, prepend+"termCOUNT.csv")
CFDhelpers.cfd_to_matrix(cfd_textroots, termcountcsvpath)
tfidfcsvpath = os.path.join(self.matrixpath, prepend+"termTFIDF.csv")
texter.compute_tfidf_ondisc(termcountcsvpath, tfidfcsvpath)
csvpath = os.path.join(self.matrixpath, prepend+"-rootcountindex.csv")
CFDhelpers.cfd_to_matrix(cfd_roottext, csvpath)
csvpath = os.path.join(self.matrixpath, prepend+"rootWpostagCOUNT.csv")
CFDhelpers.cfd_to_matrix(cfd_textrootWpostag, csvpath)
csvpath = os.path.join(self.matrixpath, prepend+"literalWpostagCOUNT.csv")
CFDhelpers.cfd_to_matrix(cfd_textliterals, csvpath)
# diger csv'lerden devam 6 Subat 05:42 uyuyuyuyuyuyu
# kalklaklkalklklaklaklkal 15:32
if not processcontent:
print "keep exclamation !"
IOtools.tocsv_lst(titleexclamation, os.path.join(self.matrixpath, prepend+"-exclamation.csv"))