本文整理汇总了Python中sentimentfinding.IOtools.readcsv方法的典型用法代码示例。如果您正苦于以下问题:Python IOtools.readcsv方法的具体用法?Python IOtools.readcsv怎么用?Python IOtools.readcsv使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sentimentfinding.IOtools
的用法示例。
在下文中一共展示了IOtools.readcsv方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run_copy_from_gold
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import readcsv [as 别名]
def run_copy_from_gold():
maincsvpath = "/home/dicle/Dropbox/ukp/fallacy_detection/mturk_annotations/annotationdf_worker.csv"
indf = IOtools.readcsv(maincsvpath)
sourcecsvpath = "/home/dicle/Dropbox/ukp/fallacy_detection/expertandgoldannotations/gold-labels3.csv"
sourcedf = IOtools.readcsv(sourcecsvpath)
outfilepath = "/home/dicle/Dropbox/ukp/fallacy_detection/mturk_annotations/annotationdf_wtexts_wmajority_worker.csv"
insert_texts(indf, sourcedf, outfilepath)
示例2: prepare_experiment
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import readcsv [as 别名]
def prepare_experiment(self, Xpath, ypath, erootpath, labelnames=None):
self.datapath = Xpath
self.labelpath = ypath
#if erootpath:
self.set_score_folder(erootpath)
yvector = IOtools.readcsv(ypath, True)
self.ylabels = yvector.answer.values
yvals = self.ylabels.copy().tolist()
#print "y vals ",yvals
#print "vect ", self.ylabels
if labelnames is None:
labelnames = ["class "+str(i) for i in list(set(yvals))]
instanceids = yvector.index.values.tolist()
datadf = IOtools.readcsv(Xpath, keepindex=True)
datadf = datadf.loc[instanceids, :]
self.X = datadf.values
self.X[np.isnan(self.X)] = 0
self.X[np.isinf(self.X)] = 0
''' do it inside models
if normalize:
self.X = preprocessing.normalize(self.X, axis=0)
'''
''' can't apply standardization as it results in negative entries in the matrix,
示例3: get_2_classes
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import readcsv [as 别名]
def get_2_classes(labelrootpath, taggertype, in_NC=5):
out_NC = 2
if in_NC <= out_NC:
return
originallabelspath = os.path.join(labelrootpath, "NC"+metaexperimentation.intrafeatsep+str(in_NC), taggertype+".csv")
labeldf = IOtools.readcsv(originallabelspath, keepindex=True)
outlabelspath = os.path.join(ensure_nclass_dir(labelrootpath, out_NC), taggertype+".csv")
labelvector = labeldf.values
labelvector = np.array(labelvector, dtype=object)
# replace values 12->"sub"; 34->"obj"
labelvector[labelvector == 1] = 12
labelvector[labelvector == 2] = 12
labelvector[labelvector == 3] = 34
labelvector[labelvector == 4] = 34
for i,_ in enumerate(labelvector):
if labelvector[i] == 5:
labelvector[i] = random.choice([12, 34])
twolabeldf = pd.DataFrame(labelvector, columns=labeldf.columns.values.tolist(), index=labeldf.index.values.tolist())
IOtools.tocsv(twolabeldf, outlabelspath, keepindex=True)
示例4: exclude_one_feature
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import readcsv [as 别名]
def exclude_one_feature(self):
exclusionmap = utils.get_excluded_features_map()
for exclusionname, featuremap in exclusionmap.iteritems():
p1 = IOtools.ensure_dir(os.path.join(self.combinedfeaturesfolder, exclusionname))
for featuregroup, combcodemap in featuremap.iteritems():
p2 = IOtools.ensure_dir(os.path.join(p1, featuregroup))
for combcode, row in combcodemap.iteritems():
featuredflist = []
for j,featno in enumerate(row):
print combcode[:8]," ",row, " featno= ",featno
if featno >= 0:
groupname = sorted(self.featuremap.keys())[j]
print " -> ",groupname
extractorinstance = self.featuremap[groupname][featno]
featurematrixpath = extractorinstance.getfeaturematrixpath
featurematrix = IOtools.readcsv(featurematrixpath, keepindex=True)
featuredflist.append(featurematrix)
datamatrix = pd.concat(featuredflist, axis=1) #, verify_integrity=True) # CLOSED DUE TO THE OVERLAPPING WORDS IN ABS AND SUBJ LISTS
datamatrixpath = os.path.join(p2, combcode+".csv")
IOtools.tocsv(datamatrix, datamatrixpath, keepindex=True)
示例5: combine_features
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import readcsv [as 别名]
def combine_features(self, combmatrix):
ncombs, nrows = combmatrix.shape
for i,row in enumerate(combmatrix):
filename = "comb"+str(i)+"_F"
featuredflist = []
for j,featno in enumerate(row):
groupname = sorted(self.featuremap.keys())[j]
filename += "_"+str(j)+"-"+str(featno) # filename = combNO_F_GROUPNO-FEATNO
extractorinstance = self.featuremap[groupname][featno]
featurematrixpath = extractorinstance.getfeaturematrixpath
featurematrix = IOtools.readcsv(featurematrixpath, keepindex=True)
featuredflist.append(featurematrix)
print filename
print utils.decode_combcode(filename, self.featuremap)
datamatrix = pd.concat(featuredflist, axis=1) #, verify_integrity=True) # CLOSED DUE TO THE OVERLAPPING WORDS IN ABS AND SUBJ LISTS
#datamatrix['index'] = datamatrix.index
#datamatrix = datamatrix.drop_duplicates(cols='index')
#del datamatrix['index']
# replace nan and inf cells !! no. work on matrix, not df. better do this change on learning
#datamatrix[np.isnan(datamatrix)] = 0
#datamatrix[np.isinf(datamatrix)] = 0
datamatrixpath = self.combinedfeaturesfolder + os.sep + filename + ".csv"
IOtools.tocsv(datamatrix, datamatrixpath, keepindex=True)
# record comb name decoding
decodednamesfolder = IOtools.ensure_dir(os.path.join(self.datasetrootpath, metacorpus.decodedcombnamesfoldername))
decodedname = utils.tostr_decoded_combcode(filename, self.featuremap)
IOtools.todisc_txt(decodedname, os.path.join(decodednamesfolder, filename+".txt"))
示例6: assign_annotator_aggreement
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import readcsv [as 别名]
def assign_annotator_aggreement(doubleannotated_path, doubleannot_filename):
csvpath = os.path.join(doubleannotated_path, doubleannot_filename)
doubleannotatedcsv = IOtools.readcsv(csvpath)
nrows, _ = doubleannotatedcsv.shape
doubleannotated_full4class = doubleannotatedcsv.loc[:, ["questionname", "answer"]].copy()
doubleannotated_half2class = doubleannotatedcsv.loc[:, ["questionname", "answer"]].copy()
# get full agreed and half agreed annotations:
for i in range(nrows):
answer1 = doubleannotatedcsv.loc[i, "answer1"]
answer2 = doubleannotatedcsv.loc[i, "answer2"]
if answer1 == answer2:
doubleannotated_full4class.loc[i, "answer"] = answer1
if answer1 in [1,2] and answer2 in [1,2]: # elif?
doubleannotated_half2class.loc[i, "answer"] = 12
elif answer1 in [3,4] and answer2 in [3,4]:
doubleannotated_half2class.loc[i, "answer"] = 34
# filtrate non-agreeing rows:
doubleannotated_full4class = doubleannotated_full4class[doubleannotated_full4class["answer"] > 0]
csvpath1 = os.path.join(doubleannotated_path, "doubleannotated_fullagr4class.csv")
IOtools.tocsv(doubleannotated_full4class, csvpath1)
doubleannotated_half2class = doubleannotated_half2class[doubleannotated_half2class["answer"] > 0]
csvpath2 = os.path.join(doubleannotated_path, "doubleannotated_halfagr2class.csv")
IOtools.tocsv(doubleannotated_half2class, csvpath2)
示例7: get_randomly_annotated_set
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import readcsv [as 别名]
def get_randomly_annotated_set(incsvfilename, outcsvfilename,
incsvfolder=metacorpus.userannotatedpath, outcsvfolder=metacorpus.randomannotatedpath,
randomchoicevalues=metacorpus.subjectivity_label_values.keys()):
df = IOtools.readcsv(os.path.join(incsvfolder, incsvfilename)) # df cols: questionname,userid,answer
randomdf= df.copy()
numofrows, _ = randomdf.values.shape
subjvalues = randomchoicevalues
randomanswers = [random.choice(subjvalues) for _ in range(numofrows)]
randomdf.loc[:, "answer"] = randomanswers
# extra: assign 5 of the rows the value 5 for the answer 'no idea, ambiguous'
notknowingrows = random.sample(range(numofrows), 5)
'''
for _ in range(5):
randindex = random.randint(0, numofrows-1)
while randindex in notknowingrows:
randindex = random.randint(0, numofrows-1)
notknowingrows.append(randindex)
'''
#notknowingrows = [random.randint(0, numofrows-1) for _ in range(5)] # be careful with this 5 number it is subject to change for the sake of statistical validity
randomdf.loc[notknowingrows, "answer"] = 5
IOtools.tocsv(randomdf, os.path.join(outcsvfolder, outcsvfilename))
示例8: get_allfolds_bigdf
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import readcsv [as 别名]
def get_allfolds_bigdf(foldrootpath, annottype, featset, labelunion):
bigdf = pd.DataFrame(columns=metaexperimentation.performanceheader)
folds = IOtools.getfoldernames_of_dir(foldrootpath)
for foldno in folds:
p1 = os.path.join(foldrootpath, foldno)
scorecsvfilepath = p1 + os.sep + metaexperimentation.scorefilename+".csv"
scorecsvfile = IOtools.readcsv(scorecsvfilepath)
print " scorefile ",scorecsvfilepath," ",scorecsvfile.shape
#rankdf = matrixhelpers.get_first_N_rows(scorecsvfile, int(N / 2), metricnames, ascend=takeworst)
rankdf = scorecsvfile.copy()
rankdf["labelunion"] = labelunion
rankdf["featureset"] = featset
rankdf["annottype"] = annottype
rankdf["fold"] = foldno
bigdf = bigdf.append(rankdf)
#dflist.append(rankdf)
print "FOLDROOTPATH ",foldrootpath
outcsvpath = os.path.join(foldrootpath, "bigdf.csv")
IOtools.tocsv(bigdf, outcsvpath, False)
示例9: get_AllObj_AllSubj_class
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import readcsv [as 别名]
def get_AllObj_AllSubj_class(originallabelspath, outfolder, in_NC=5):
out_NC = 2
if in_NC <= out_NC:
return
labeldf = IOtools.readcsv(originallabelspath, keepindex=True)
outpath = os.path.join(ensure_unionclass_dir(outfolder, "ALLobj-ALLsubj", out_NC), metacorpus.labelsfilename + ".csv")
labelvector = labeldf.values
labelvector = np.array(labelvector, dtype=object)
# replace values 12->"sub"; 34->"obj"
labelvector[labelvector == 1] = 12
labelvector[labelvector == 2] = 12
labelvector[labelvector == 3] = 34
labelvector[labelvector == 4] = 34
for i,_ in enumerate(labelvector):
if labelvector[i] == 5:
labelvector[i] = random.choice([12, 34])
twolabeldf = pd.DataFrame(labelvector, columns=labeldf.columns.values.tolist(), index=labeldf.index.values.tolist())
IOtools.tocsv(twolabeldf, outpath, keepindex=True)
示例10: tuple2matrix
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import readcsv [as 别名]
def tuple2matrix(fileidlabelpairs, Xpath):
fileids = []
y = []
for fileid,label in fileidlabelpairs:
fileids.append(fileid)
y.append(label)
y = np.array(y)
datadf = IOtools.readcsv(Xpath, keepindex=True)
datadf = datadf.loc[fileids, :]
X = datadf.values
X[np.isnan(X)] = 0
X[np.isinf(X)] = 0
# check for fileid order
count = 0
instanceids = datadf.index.values.tolist()
for a,b in zip(instanceids, fileids):
if a == b:
count += 1
if count != len(instanceids):
print "FAAAAAAAAAAAAALLLLLLLLLLLLLSSSSSSSSSEEEEEEE"
return X, y
示例11: apply_algorithms
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import readcsv [as 别名]
def apply_algorithms(self, scorefilepath, labelnames=None):
yvals = self.ylabels.copy().tolist()
print "y vals ",yvals
if labelnames is None:
labelnames = ["cluster "+str(i) for i in list(set(yvals))]
nclusters = 3 # we will change it
kmeans = cluster.KMeans(n_clusters=nclusters)
spectral = cluster.SpectralClustering(n_clusters=nclusters)
self.models.append(kmeans)
self.models.append(spectral)
datadf = IOtools.readcsv(self.Xpath, keepindex=True)
X = datadf.values
print "y sh ",self.ylabels.shape
print "X ",X[0]
print "apply clustering"
for model in self.models:
modelname = model.__class__.__name__
experimentname = "_MT-"+self.methodname+"_alg-"+modelname+"_nc-"+str(nclusters)
print "...",modelname
ytrue, ypred = self.ylabels, model.fit_predict(X)
self.reportresults(ytrue, ypred, experimentname, scorefilepath, labelnames)
'''
示例12: csv2latextable_algorithm
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import readcsv [as 别名]
def csv2latextable_algorithm(inpath, outpath, filename, metricname):
header = "\\begin{table}[h] \n \
\\begin{center} \n \
\\begin{tabular}{|p{9cm}|p{2cm}|p{2cm}|p{2cm}|} \n \
\\hline \\bf algorithm \& parameters & \\bf mean "+ metricname +" & \\bf minimum "+ metricname +" & \\bf maximum "+ metricname +" \\\ \\hline"
footer = "\\end{tabular} \n \
\\end{center} \n \
\\caption{\\label{alg-"+metricname[:4]+"-stats} Mean, maximum and minimum "+metricname+" results for 27 learning models } \n \
\\end{table}"
ip1 = os.path.join(inpath, filename+".csv")
df = IOtools.readcsv(ip1, keepindex=True)
nrows, ncols = df.shape
rowids = df.index.values.tolist()
out = header+"\n"
for rowid in rowids:
featset = rowid[4:]
featset = "\\verb|"+featset+"|"
out += featset + " & "
#np.round(a, decimals, out)
mean = df.loc[rowid, "mean"]
min = df.loc[rowid, "min"]
max = df.loc[rowid, "max"]
stats = map(lambda x : str(round(x, 5)), [mean, min, max])
statsstr = " & ".join(stats)
out += statsstr + " \\\ \hline " + "\n"
out += footer
IOtools.todisc_txt(out, os.path.join(outpath, filename+".txt"))
示例13: csv2latextable_featset
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import readcsv [as 别名]
def csv2latextable_featset(inpath, outpath, filename, metricname):
header = "\\begin{table}[h] \n \
\\begin{center} \n \
\\begin{tabular}{|p{5cm}|p{2cm}|p{2cm}|p{2cm}|} \n \
\\hline \\bf feature-combined dataset name & \\bf mean "+ metricname +" & \\bf minimum "+ metricname +" & \\bf maximum "+ metricname +" \\\ \\hline"
footer = "\\end{tabular} \n \
\\end{center} \n \
\\caption{\\label{featset-"+metricname[:4]+"-stats} Mean, maximum and minimum "+metricname+" results for 8 feature-measure-combined datasets } \n \
\\end{table}"
ip1 = os.path.join(inpath, filename+".csv")
df = IOtools.readcsv(ip1, keepindex=True)
nrows, ncols = df.shape
rowids = df.index.values.tolist()
out = header+"\n"
for rowid in rowids:
featset = rowid.split("**")[0].strip()
featset = "\\verb|"+featset+"|"
out += featset + " & "
#np.round(a, decimals, out)
mean = df.loc[rowid, "mean"]
min = df.loc[rowid, "min"]
max = df.loc[rowid, "max"]
stats = map(lambda x : str(round(x, 5)), [mean, min, max])
statsstr = " & ".join(stats)
out += statsstr + " \\\ \hline " + "\n"
out += footer
IOtools.todisc_txt(out, os.path.join(outpath, filename+".txt"))
示例14: best_score_per_annottype
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import readcsv [as 别名]
def best_score_per_annottype(self, metricname, scorepath=metaexperimentation.expscorepath):
bigdf = pd.DataFrame(columns=metaexperimentation.performanceheader)
#scorepath = os.path.join(self.experimentspath, "scores")
annottypes = IOtools.getfoldernames_of_dir(scorepath)
for annottype in annottypes:
annotdf = pd.DataFrame(columns=metaexperimentation.performanceheader)
p1 = os.path.join(scorepath, annottype)
#featcombnames = IOtools.getfoldernames_of_dir(p1) # list of combcode_NC names
metricclasses = IOtools.getfoldernames_of_dir(p1)
for metricclass in metricclasses:
p2 = os.path.join(p1, metricclass)
featcombnames = IOtools.getfoldernames_of_dir(p2)
for combname in featcombnames:
p3 = os.path.join(p2, combname)
labelunions = IOtools.getfoldernames_of_dir(p3)
for labelunion in labelunions:
p4 = os.path.join(p3, labelunion)
folds = IOtools.getfoldernames_of_dir(p4)
for fold in folds:
p5 = os.path.join(p4, fold)
scorecsvfilepath = p5 + os.sep + metaexperimentation.scorefilename+".csv"
scorecsvfile = IOtools.readcsv(scorecsvfilepath)
# drop clustering results as they are useless being not worked on (back validation missing)
scorecsvfile = scorecsvfile[np.logical_not(scorecsvfile.algorithm.str.startswith("_MT-Clustering"))]
rankdf = matrixhelpers.get_first_N_rows(scorecsvfile, int(self.N / 2), [metricname], ascend=self.takeworst)
print rankdf.shape
#annotdf.loc[:, rankdf.columns.values.tolist()] = rankdf.values.copy()
print " ** ",annotdf.shape
rankdf["labelunion"] = labelunion
rankdf["featureset"] = metricclass + " ** " + combname
rankdf["annottype"] = annottype
#dflist.append(rankdf)
annotdf = annotdf.append(rankdf)
print scorecsvfile.shape
annotdf = matrixhelpers.get_first_N_rows(annotdf, self.N, [metricname], ascend=self.takeworst)
bigdf = bigdf.append(annotdf)
# insert annottype as colname to bigdf. cutbigdf from the first 10.
bigdf.sort(["annottype", metricname], ascending=self.takeworst, inplace=True)
#resultantdf = matrixhelpers.get_first_N_rows(bigdf, self.N)
evaluationname = self.prefix+"_score_per_annottype-"+metricname.upper()
IOtools.tocsv(bigdf, os.path.join(self.resultspath, evaluationname+".csv"))
示例15: get_fold_averages_ablation
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import readcsv [as 别名]
def get_fold_averages_ablation():
ablationCVscoresroot = "/home/dicle/Dicle/Tez/corpusstats/learning11/ablation2/"
ablationtypes = ["item", "group", "onedim"]
annotationtypes = ["double"]
featsets = ["redef-rat_lex-rat"]
'''labelunions = ["EACHobj-EACHsubj","ALLobj-ALLsubj","ALLobj-STGsubj",
"STGobj-ALLsubj", "STGobj-STGsubj", "WKobj-WKsubj"]
'''
for ablationtype in ablationtypes:
print ablationtype
p1 = os.path.join(ablationCVscoresroot, ablationtype, "scores")
exclusionnames = IOtools.getfoldernames_of_dir(p1)
for excname in exclusionnames:
bigdf = pd.DataFrame(columns=metaexperimentation.performanceheader)
p2 = os.path.join(p1, excname)
for annottype in annotationtypes:
p3 = os.path.join(p2, annottype)
for featset in featsets:
p4 = os.path.join(p3, featset)
combname = IOtools.getfoldernames_of_dir(p4)[0] # we know that there is only one folder
p5 = os.path.join(p4, combname)
labelunions = IOtools.getfoldernames_of_dir(p5)
for labelunion in labelunions:
p6 = os.path.join(p5, labelunion)
folds = IOtools.getfoldernames_of_dir(p6)
for foldno in folds:
p7 = os.path.join(p6, foldno)
scorecsvfilepath = p7 + os.sep + metaexperimentation.scorefilename+".csv"
scorecsvfile = IOtools.readcsv(scorecsvfilepath)
print " scorefile ",scorecsvfilepath," ",scorecsvfile.shape
#rankdf = matrixhelpers.get_first_N_rows(scorecsvfile, int(N / 2), metricnames, ascend=takeworst)
rankdf = scorecsvfile.copy()
rankdf["labelunion"] = labelunion
rankdf["featureset"] = featset + " ** " + combname
rankdf["annottype"] = annottype
rankdf["fold"] = foldno
#dflist.append(rankdf)
bigdf = bigdf.append(rankdf)
print bigdf.shape," ",p2
IOtools.tocsv(bigdf, os.path.join(p2, "bigdf.csv"))
get_fold_averages(p2)