本文整理汇总了Python中sentimentfinding.IOtools类的典型用法代码示例。如果您正苦于以下问题:Python IOtools类的具体用法?Python IOtools怎么用?Python IOtools使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了IOtools类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: csv2latextable_algorithm
def csv2latextable_algorithm(inpath, outpath, filename, metricname):
header = "\\begin{table}[h] \n \
\\begin{center} \n \
\\begin{tabular}{|p{9cm}|p{2cm}|p{2cm}|p{2cm}|} \n \
\\hline \\bf algorithm \& parameters & \\bf mean "+ metricname +" & \\bf minimum "+ metricname +" & \\bf maximum "+ metricname +" \\\ \\hline"
footer = "\\end{tabular} \n \
\\end{center} \n \
\\caption{\\label{alg-"+metricname[:4]+"-stats} Mean, maximum and minimum "+metricname+" results for 27 learning models } \n \
\\end{table}"
ip1 = os.path.join(inpath, filename+".csv")
df = IOtools.readcsv(ip1, keepindex=True)
nrows, ncols = df.shape
rowids = df.index.values.tolist()
out = header+"\n"
for rowid in rowids:
featset = rowid[4:]
featset = "\\verb|"+featset+"|"
out += featset + " & "
#np.round(a, decimals, out)
mean = df.loc[rowid, "mean"]
min = df.loc[rowid, "min"]
max = df.loc[rowid, "max"]
stats = map(lambda x : str(round(x, 5)), [mean, min, max])
statsstr = " & ".join(stats)
out += statsstr + " \\\ \hline " + "\n"
out += footer
IOtools.todisc_txt(out, os.path.join(outpath, filename+".txt"))
示例2: csv2latextable_featset
def csv2latextable_featset(inpath, outpath, filename, metricname):
header = "\\begin{table}[h] \n \
\\begin{center} \n \
\\begin{tabular}{|p{5cm}|p{2cm}|p{2cm}|p{2cm}|} \n \
\\hline \\bf feature-combined dataset name & \\bf mean "+ metricname +" & \\bf minimum "+ metricname +" & \\bf maximum "+ metricname +" \\\ \\hline"
footer = "\\end{tabular} \n \
\\end{center} \n \
\\caption{\\label{featset-"+metricname[:4]+"-stats} Mean, maximum and minimum "+metricname+" results for 8 feature-measure-combined datasets } \n \
\\end{table}"
ip1 = os.path.join(inpath, filename+".csv")
df = IOtools.readcsv(ip1, keepindex=True)
nrows, ncols = df.shape
rowids = df.index.values.tolist()
out = header+"\n"
for rowid in rowids:
featset = rowid.split("**")[0].strip()
featset = "\\verb|"+featset+"|"
out += featset + " & "
#np.round(a, decimals, out)
mean = df.loc[rowid, "mean"]
min = df.loc[rowid, "min"]
max = df.loc[rowid, "max"]
stats = map(lambda x : str(round(x, 5)), [mean, min, max])
statsstr = " & ".join(stats)
out += statsstr + " \\\ \hline " + "\n"
out += footer
IOtools.todisc_txt(out, os.path.join(outpath, filename+".txt"))
示例3: buildcorpus
def buildcorpus(nfile, ncat, resourcename, path):
resourcepath = path + os.sep + resourcename
catnames = IOtools.getfoldernames_of_dir(resourcepath)[:ncat]
featurematrix = []
doctermmatrix = []
cfdTermDoc = nltk.ConditionalFreqDist()
for catname in catnames:
fileids = []
p = resourcepath + os.sep + catname + os.sep
fileids.extend(IOtools.getfilenames_of_dir(p, removeextension=False)[:nfile])
corpus = CorpusFeatures(fileids, resourcename+os.sep+catname, p)
corpus.getfeatures()
datapoints = corpus.build_featurematrix()
for k,v in datapoints.iteritems():
featurematrix.append([k]+v+[resourcename])
corpus.plot_features()
#doc term matrix
cfd = corpus.build_termmatrix()
for fileid in cfd.conditions():
for term in list(cfd[fileid]):
cfdTermDoc[fileid].inc(term)
IOtools.todisc_matrix(featurematrix, IOtools.results_rootpath+os.sep+"MATRIX"+str(nfile*ncat)+"texts.txt", mode="a")
示例4: run_copy_from_gold
def run_copy_from_gold():
maincsvpath = "/home/dicle/Dropbox/ukp/fallacy_detection/mturk_annotations/annotationdf_worker.csv"
indf = IOtools.readcsv(maincsvpath)
sourcecsvpath = "/home/dicle/Dropbox/ukp/fallacy_detection/expertandgoldannotations/gold-labels3.csv"
sourcedf = IOtools.readcsv(sourcecsvpath)
outfilepath = "/home/dicle/Dropbox/ukp/fallacy_detection/mturk_annotations/annotationdf_wtexts_wmajority_worker.csv"
insert_texts(indf, sourcedf, outfilepath)
示例5: get_allfolds_bigdf
def get_allfolds_bigdf(foldrootpath, annottype, featset, labelunion):
bigdf = pd.DataFrame(columns=metaexperimentation.performanceheader)
folds = IOtools.getfoldernames_of_dir(foldrootpath)
for foldno in folds:
p1 = os.path.join(foldrootpath, foldno)
scorecsvfilepath = p1 + os.sep + metaexperimentation.scorefilename+".csv"
scorecsvfile = IOtools.readcsv(scorecsvfilepath)
print " scorefile ",scorecsvfilepath," ",scorecsvfile.shape
#rankdf = matrixhelpers.get_first_N_rows(scorecsvfile, int(N / 2), metricnames, ascend=takeworst)
rankdf = scorecsvfile.copy()
rankdf["labelunion"] = labelunion
rankdf["featureset"] = featset
rankdf["annottype"] = annottype
rankdf["fold"] = foldno
bigdf = bigdf.append(rankdf)
#dflist.append(rankdf)
print "FOLDROOTPATH ",foldrootpath
outcsvpath = os.path.join(foldrootpath, "bigdf.csv")
IOtools.tocsv(bigdf, outcsvpath, False)
示例6: prepare_experiment
def prepare_experiment(self, Xpath, ypath, erootpath, labelnames=None):
self.datapath = Xpath
self.labelpath = ypath
#if erootpath:
self.set_score_folder(erootpath)
yvector = IOtools.readcsv(ypath, True)
self.ylabels = yvector.answer.values
yvals = self.ylabels.copy().tolist()
#print "y vals ",yvals
#print "vect ", self.ylabels
if labelnames is None:
labelnames = ["class "+str(i) for i in list(set(yvals))]
instanceids = yvector.index.values.tolist()
datadf = IOtools.readcsv(Xpath, keepindex=True)
datadf = datadf.loc[instanceids, :]
self.X = datadf.values
self.X[np.isnan(self.X)] = 0
self.X[np.isinf(self.X)] = 0
''' do it inside models
if normalize:
self.X = preprocessing.normalize(self.X, axis=0)
'''
''' can't apply standardization as it results in negative entries in the matrix,
示例7: getwordsandlemmasfromfile
def getwordsandlemmasfromfile():
rootpath = "/home/dicle/Dicle/Tez/geziyakurdiproject/"
corpuspath = rootpath + os.sep + "texts/"
outwordspath = rootpath + os.sep + "weightedwords/"
fileids = IOtools.getfilenames_of_dir(corpuspath, removeextension=False)
for fileid in fileids:
txt = texter.readtxtfile(corpuspath+os.sep+fileid)
marker = "Haziran 2013"
mark = txt.find(marker) # skip metadata
txt = txt[mark+len(marker):]
words = texter.getwords(txt)
lemmatuples = SAKsParser.findrootsinlexicon(words)
roots = [root for _,root,_ in lemmatuples]
fdwords = nltk.FreqDist(words)
fdroots = nltk.FreqDist(roots)
weightedwords = [word+"\t"+str(fdwords[word]) for word in list(fdwords)]
weightedroots = [root+"\t"+str(fdroots[root]) for root in list(fdroots)]
IOtools.todisc_list(outwordspath+os.sep+"lemma"+os.sep+fileid, weightedwords)
IOtools.todisc_list(outwordspath+os.sep+"root"+os.sep+fileid, weightedroots)
示例8: report_results
def report_results(self):
self.compute_precision()
self.compute_recall()
self.compute_fmeasure()
self.compute_accuracy()
IOtools.todisc_matrix(self.confusionmatrix, self.folder+os.sep+self.experimentname+".confmat")
f = codecs.open(self.folder+os.sep+self.experimentname+".results", "a", encoding='utf8')
# write report as list not to keep the whole string in memory
header = "\t" + "\t".join(self.catmetrics.keys()) +"\n"
f.write(header)
labelencoding, _ = classfhelpers.classlabelindicing(self.classes) # labeldecoding contains indices
for c in self.classes:
i = labelencoding[c]
line = []
line.append(c)
for metricname in self.catmetrics.keys():
line.append(self.catmetrics[metricname][i])
line = map(lambda x : str(x), line)
outstr = "\t".join(line) + "\n"
f.write(outstr)
f.write("\nAccuracy: "+str(self.accuracy))
f.close()
示例9: get_user_text_distributions
def get_user_text_distributions(self):
#users = range(1, self.ncoders+1)
# 1- get single-annotation list
# 2- get double-annotation list
usertextassignment = {}
singleannot_distribution = Selection()
singleannot_distribution.initialize(self.months, self.resources, self.cats)
for i,user in enumerate(self.coders):
oneuser_distribution, assignment = self.justice_selection(self.nsingle) # will return textids as (newsid-res-cat) # handle selected_texts here
usertextassignment[i] = assignment
singleannot_distribution.update_selection(oneuser_distribution)
# record userassign. and distribution
#self.singles_jsonpath = os.path.join(self.outfolder, "singleannotation_assignments.txt")
IOtools.todisc_json(self.singles_jsonpath, usertextassignment, ind=5)
singleannot_distribution.todisc(os.path.join(self.outfolder, "singleannotation_distribution.txt"))
textassignments = {}
# BURADA numberofdoubleannotatabletexts sayısında bug var. (self.ncoders/2)*self.noverlaps olmalı.
#doubleannot_distribution, textassignments = self.justice_selection(self.ncoders * self.noverlaps)
doubleannot_distribution, textassignments = self.justice_selection(int(self.ncoders / 2.0) * self.noverlaps)
#self.doubles_jsonpath = os.path.join(self.outfolder, "doubleannotation_assignments.txt")
IOtools.todisc_json(self.doubles_jsonpath, textassignments)
doubleannot_distribution.todisc(os.path.join(self.outfolder, "doubleannotation_distribution.txt"))
示例10: diff_word_lists
def diff_word_lists(list1, list2, outdir, outfilename):
l = list(set(list1) - set(list2))
IOtools.todisc_list(outdir+os.sep+outfilename+".txt", l)
fdist = nltk.FreqDist(l)
IOtools.todisc_freqdist(outdir+os.sep+"weighted-"+outfilename+".txt", fdist)
return l
示例11: evaluate_crosscorpus
def evaluate_crosscorpus(scoresroot):
featclasses = IOtools.getfoldernames_of_dir(scoresroot)
for featureclass in featclasses:
p1 = os.path.join(scoresroot, featureclass)
lunions = IOtools.getfoldernames_of_dir(p1)
for labelunion in lunions:
p2 = os.path.join(p1, labelunion)
testcases = IOtools.getfoldernames_of_dir(p2)
for testcase in testcases:
p3 = os.path.join(p2, testcase)
traincases = IOtools.getfoldernames_of_dir(p3)
for traincase in traincases:
p4 = os.path.join(p3, traincase) # foldspath
get_allfolds_bigdf(foldrootpath=p4,
annottype=testcase + " ** "+traincase,
featset=featureclass,
labelunion=labelunion)
get_fold_averages(p4)
示例12: get_randomly_annotated_set
def get_randomly_annotated_set(incsvfilename, outcsvfilename,
incsvfolder=metacorpus.userannotatedpath, outcsvfolder=metacorpus.randomannotatedpath,
randomchoicevalues=metacorpus.subjectivity_label_values.keys()):
df = IOtools.readcsv(os.path.join(incsvfolder, incsvfilename)) # df cols: questionname,userid,answer
randomdf= df.copy()
numofrows, _ = randomdf.values.shape
subjvalues = randomchoicevalues
randomanswers = [random.choice(subjvalues) for _ in range(numofrows)]
randomdf.loc[:, "answer"] = randomanswers
# extra: assign 5 of the rows the value 5 for the answer 'no idea, ambiguous'
notknowingrows = random.sample(range(numofrows), 5)
'''
for _ in range(5):
randindex = random.randint(0, numofrows-1)
while randindex in notknowingrows:
randindex = random.randint(0, numofrows-1)
notknowingrows.append(randindex)
'''
#notknowingrows = [random.randint(0, numofrows-1) for _ in range(5)] # be careful with this 5 number it is subject to change for the sake of statistical validity
randomdf.loc[notknowingrows, "answer"] = 5
IOtools.tocsv(randomdf, os.path.join(outcsvfolder, outcsvfilename))
示例13: metadata_tabular
def metadata_tabular():
rpath = "/home/dicle/Dicle/Tez/geziyakurdiproject/corpus2/ldatests22Temmuz/wordletest/words/temp/"
metadf = pd.read_csv(rpath+"/metadocs.csv", index_col=None, sep="\t")
print metadf.loc[0,"Author"]
metadf = metadf.sort(["Polarity", "Date", "Author"], ascending=[False, True, True])
v = metadf.iloc[0,:]
print v.loc["Author"],v.loc["Resource"]
header = "\\begin{tabular}{l | c | c | c | c } \n \
kategori & yazar & başlık & tarih & yayın \\\\ \n \
\\hline \\hline \n"
end = "\\end{tabular}"
outltx = ""
numofdocs, fields = metadf.shape
for i in range(numofdocs):
row = metadf.iloc[i,:]
cat = row.loc["Polarity"]
cat = "\\textbf{"+cat+"}"
author = row.loc["Author"]
title = row.loc["Title"]
link = row.loc["Link"]
date = row.loc["Date"]
resource = row.loc["Resource"]
title = "\\href{"+link+"}{"+title+"}"
date = "\\textit{"+date+"}"
resource = "@"+resource
s = " & ".join([cat, author, title, date, resource])
outltx = outltx + s + "\\\\ \n \\hline \n"
outltx = header + outltx + end
IOtools.todisc_txt(outltx, rpath+"docswordle_tableLaTeX.txt")
示例14: add_resource_label
def add_resource_label(matrixpath, datasetname, replacelabel=False, headers=True):
matrixlines = IOtools.readtextlines(matrixpath) # 1st item=fileid, lastitem=filecat.
newmatrix = []
if headers:
matrixlines = matrixlines[2:]
for instance in matrixlines:
items = instance.split()
fileid = items[0]
print instance,
path = datapath+os.sep+datasetname
foldernames = IOtools.getfoldernames_of_dir(datapath+os.sep+datasetname)
#print foldernames
for folder in foldernames:
allfileids = IOtools.getfilenames_of_dir(path+os.sep+folder, removeextension=False)
#print allfileids
if fileid in allfileids:
newspath = path+os.sep+folder+os.sep+fileid
resourcename = texter.getnewsmetadata(newspath, ["resource"])["resource"]
#print "## ",resourcename," ",type(instance)," ~~ ",instance
if replacelabel: items = items[:-1]
newmatrix.append(items +[resourcename])
break
return newmatrix
示例15: get_AllObj_AllSubj_class
def get_AllObj_AllSubj_class(originallabelspath, outfolder, in_NC=5):
out_NC = 2
if in_NC <= out_NC:
return
labeldf = IOtools.readcsv(originallabelspath, keepindex=True)
outpath = os.path.join(ensure_unionclass_dir(outfolder, "ALLobj-ALLsubj", out_NC), metacorpus.labelsfilename + ".csv")
labelvector = labeldf.values
labelvector = np.array(labelvector, dtype=object)
# replace values 12->"sub"; 34->"obj"
labelvector[labelvector == 1] = 12
labelvector[labelvector == 2] = 12
labelvector[labelvector == 3] = 34
labelvector[labelvector == 4] = 34
for i,_ in enumerate(labelvector):
if labelvector[i] == 5:
labelvector[i] = random.choice([12, 34])
twolabeldf = pd.DataFrame(labelvector, columns=labeldf.columns.values.tolist(), index=labeldf.index.values.tolist())
IOtools.tocsv(twolabeldf, outpath, keepindex=True)