本文整理汇总了Python中sentimentfinding.IOtools.tocsv方法的典型用法代码示例。如果您正苦于以下问题:Python IOtools.tocsv方法的具体用法?Python IOtools.tocsv怎么用?Python IOtools.tocsv使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sentimentfinding.IOtools
的用法示例。
在下文中一共展示了IOtools.tocsv方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_fold_averages_ablation
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def get_fold_averages_ablation():
ablationCVscoresroot = "/home/dicle/Dicle/Tez/corpusstats/learning11/ablation2/"
ablationtypes = ["item", "group", "onedim"]
annotationtypes = ["double"]
featsets = ["redef-rat_lex-rat"]
'''labelunions = ["EACHobj-EACHsubj","ALLobj-ALLsubj","ALLobj-STGsubj",
"STGobj-ALLsubj", "STGobj-STGsubj", "WKobj-WKsubj"]
'''
for ablationtype in ablationtypes:
print ablationtype
p1 = os.path.join(ablationCVscoresroot, ablationtype, "scores")
exclusionnames = IOtools.getfoldernames_of_dir(p1)
for excname in exclusionnames:
bigdf = pd.DataFrame(columns=metaexperimentation.performanceheader)
p2 = os.path.join(p1, excname)
for annottype in annotationtypes:
p3 = os.path.join(p2, annottype)
for featset in featsets:
p4 = os.path.join(p3, featset)
combname = IOtools.getfoldernames_of_dir(p4)[0] # we know that there is only one folder
p5 = os.path.join(p4, combname)
labelunions = IOtools.getfoldernames_of_dir(p5)
for labelunion in labelunions:
p6 = os.path.join(p5, labelunion)
folds = IOtools.getfoldernames_of_dir(p6)
for foldno in folds:
p7 = os.path.join(p6, foldno)
scorecsvfilepath = p7 + os.sep + metaexperimentation.scorefilename+".csv"
scorecsvfile = IOtools.readcsv(scorecsvfilepath)
print " scorefile ",scorecsvfilepath," ",scorecsvfile.shape
#rankdf = matrixhelpers.get_first_N_rows(scorecsvfile, int(N / 2), metricnames, ascend=takeworst)
rankdf = scorecsvfile.copy()
rankdf["labelunion"] = labelunion
rankdf["featureset"] = featset + " ** " + combname
rankdf["annottype"] = annottype
rankdf["fold"] = foldno
#dflist.append(rankdf)
bigdf = bigdf.append(rankdf)
print bigdf.shape," ",p2
IOtools.tocsv(bigdf, os.path.join(p2, "bigdf.csv"))
get_fold_averages(p2)
示例2: exclude_one_feature
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def exclude_one_feature(self):
exclusionmap = utils.get_excluded_features_map()
for exclusionname, featuremap in exclusionmap.iteritems():
p1 = IOtools.ensure_dir(os.path.join(self.combinedfeaturesfolder, exclusionname))
for featuregroup, combcodemap in featuremap.iteritems():
p2 = IOtools.ensure_dir(os.path.join(p1, featuregroup))
for combcode, row in combcodemap.iteritems():
featuredflist = []
for j,featno in enumerate(row):
print combcode[:8]," ",row, " featno= ",featno
if featno >= 0:
groupname = sorted(self.featuremap.keys())[j]
print " -> ",groupname
extractorinstance = self.featuremap[groupname][featno]
featurematrixpath = extractorinstance.getfeaturematrixpath
featurematrix = IOtools.readcsv(featurematrixpath, keepindex=True)
featuredflist.append(featurematrix)
datamatrix = pd.concat(featuredflist, axis=1) #, verify_integrity=True) # CLOSED DUE TO THE OVERLAPPING WORDS IN ABS AND SUBJ LISTS
datamatrixpath = os.path.join(p2, combcode+".csv")
IOtools.tocsv(datamatrix, datamatrixpath, keepindex=True)
示例3: best_score_per_annottype
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def best_score_per_annottype(self, metricname, scorepath=metaexperimentation.expscorepath):
bigdf = pd.DataFrame(columns=metaexperimentation.performanceheader)
#scorepath = os.path.join(self.experimentspath, "scores")
annottypes = IOtools.getfoldernames_of_dir(scorepath)
for annottype in annottypes:
annotdf = pd.DataFrame(columns=metaexperimentation.performanceheader)
p1 = os.path.join(scorepath, annottype)
#featcombnames = IOtools.getfoldernames_of_dir(p1) # list of combcode_NC names
metricclasses = IOtools.getfoldernames_of_dir(p1)
for metricclass in metricclasses:
p2 = os.path.join(p1, metricclass)
featcombnames = IOtools.getfoldernames_of_dir(p2)
for combname in featcombnames:
p3 = os.path.join(p2, combname)
labelunions = IOtools.getfoldernames_of_dir(p3)
for labelunion in labelunions:
p4 = os.path.join(p3, labelunion)
folds = IOtools.getfoldernames_of_dir(p4)
for fold in folds:
p5 = os.path.join(p4, fold)
scorecsvfilepath = p5 + os.sep + metaexperimentation.scorefilename+".csv"
scorecsvfile = IOtools.readcsv(scorecsvfilepath)
# drop clustering results as they are useless being not worked on (back validation missing)
scorecsvfile = scorecsvfile[np.logical_not(scorecsvfile.algorithm.str.startswith("_MT-Clustering"))]
rankdf = matrixhelpers.get_first_N_rows(scorecsvfile, int(self.N / 2), [metricname], ascend=self.takeworst)
print rankdf.shape
#annotdf.loc[:, rankdf.columns.values.tolist()] = rankdf.values.copy()
print " ** ",annotdf.shape
rankdf["labelunion"] = labelunion
rankdf["featureset"] = metricclass + " ** " + combname
rankdf["annottype"] = annottype
#dflist.append(rankdf)
annotdf = annotdf.append(rankdf)
print scorecsvfile.shape
annotdf = matrixhelpers.get_first_N_rows(annotdf, self.N, [metricname], ascend=self.takeworst)
bigdf = bigdf.append(annotdf)
# insert annottype as colname to bigdf. cutbigdf from the first 10.
bigdf.sort(["annottype", metricname], ascending=self.takeworst, inplace=True)
#resultantdf = matrixhelpers.get_first_N_rows(bigdf, self.N)
evaluationname = self.prefix+"_score_per_annottype-"+metricname.upper()
IOtools.tocsv(bigdf, os.path.join(self.resultspath, evaluationname+".csv"))
示例4: assign_annotator_aggreement
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def assign_annotator_aggreement(doubleannotated_path, doubleannot_filename):
csvpath = os.path.join(doubleannotated_path, doubleannot_filename)
doubleannotatedcsv = IOtools.readcsv(csvpath)
nrows, _ = doubleannotatedcsv.shape
doubleannotated_full4class = doubleannotatedcsv.loc[:, ["questionname", "answer"]].copy()
doubleannotated_half2class = doubleannotatedcsv.loc[:, ["questionname", "answer"]].copy()
# get full agreed and half agreed annotations:
for i in range(nrows):
answer1 = doubleannotatedcsv.loc[i, "answer1"]
answer2 = doubleannotatedcsv.loc[i, "answer2"]
if answer1 == answer2:
doubleannotated_full4class.loc[i, "answer"] = answer1
if answer1 in [1,2] and answer2 in [1,2]: # elif?
doubleannotated_half2class.loc[i, "answer"] = 12
elif answer1 in [3,4] and answer2 in [3,4]:
doubleannotated_half2class.loc[i, "answer"] = 34
# filtrate non-agreeing rows:
doubleannotated_full4class = doubleannotated_full4class[doubleannotated_full4class["answer"] > 0]
csvpath1 = os.path.join(doubleannotated_path, "doubleannotated_fullagr4class.csv")
IOtools.tocsv(doubleannotated_full4class, csvpath1)
doubleannotated_half2class = doubleannotated_half2class[doubleannotated_half2class["answer"] > 0]
csvpath2 = os.path.join(doubleannotated_path, "doubleannotated_halfagr2class.csv")
IOtools.tocsv(doubleannotated_half2class, csvpath2)
示例5: get_allfolds_bigdf
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def get_allfolds_bigdf(foldrootpath, annottype, featset, labelunion):
bigdf = pd.DataFrame(columns=metaexperimentation.performanceheader)
folds = IOtools.getfoldernames_of_dir(foldrootpath)
for foldno in folds:
p1 = os.path.join(foldrootpath, foldno)
scorecsvfilepath = p1 + os.sep + metaexperimentation.scorefilename+".csv"
scorecsvfile = IOtools.readcsv(scorecsvfilepath)
print " scorefile ",scorecsvfilepath," ",scorecsvfile.shape
#rankdf = matrixhelpers.get_first_N_rows(scorecsvfile, int(N / 2), metricnames, ascend=takeworst)
rankdf = scorecsvfile.copy()
rankdf["labelunion"] = labelunion
rankdf["featureset"] = featset
rankdf["annottype"] = annottype
rankdf["fold"] = foldno
bigdf = bigdf.append(rankdf)
#dflist.append(rankdf)
print "FOLDROOTPATH ",foldrootpath
outcsvpath = os.path.join(foldrootpath, "bigdf.csv")
IOtools.tocsv(bigdf, outcsvpath, False)
示例6: get_randomly_annotated_set
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def get_randomly_annotated_set(incsvfilename, outcsvfilename,
incsvfolder=metacorpus.userannotatedpath, outcsvfolder=metacorpus.randomannotatedpath,
randomchoicevalues=metacorpus.subjectivity_label_values.keys()):
df = IOtools.readcsv(os.path.join(incsvfolder, incsvfilename)) # df cols: questionname,userid,answer
randomdf= df.copy()
numofrows, _ = randomdf.values.shape
subjvalues = randomchoicevalues
randomanswers = [random.choice(subjvalues) for _ in range(numofrows)]
randomdf.loc[:, "answer"] = randomanswers
# extra: assign 5 of the rows the value 5 for the answer 'no idea, ambiguous'
notknowingrows = random.sample(range(numofrows), 5)
'''
for _ in range(5):
randindex = random.randint(0, numofrows-1)
while randindex in notknowingrows:
randindex = random.randint(0, numofrows-1)
notknowingrows.append(randindex)
'''
#notknowingrows = [random.randint(0, numofrows-1) for _ in range(5)] # be careful with this 5 number it is subject to change for the sake of statistical validity
randomdf.loc[notknowingrows, "answer"] = 5
IOtools.tocsv(randomdf, os.path.join(outcsvfolder, outcsvfilename))
示例7: get_AllObj_AllSubj_class
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def get_AllObj_AllSubj_class(originallabelspath, outfolder, in_NC=5):
out_NC = 2
if in_NC <= out_NC:
return
labeldf = IOtools.readcsv(originallabelspath, keepindex=True)
outpath = os.path.join(ensure_unionclass_dir(outfolder, "ALLobj-ALLsubj", out_NC), metacorpus.labelsfilename + ".csv")
labelvector = labeldf.values
labelvector = np.array(labelvector, dtype=object)
# replace values 12->"sub"; 34->"obj"
labelvector[labelvector == 1] = 12
labelvector[labelvector == 2] = 12
labelvector[labelvector == 3] = 34
labelvector[labelvector == 4] = 34
for i,_ in enumerate(labelvector):
if labelvector[i] == 5:
labelvector[i] = random.choice([12, 34])
twolabeldf = pd.DataFrame(labelvector, columns=labeldf.columns.values.tolist(), index=labeldf.index.values.tolist())
IOtools.tocsv(twolabeldf, outpath, keepindex=True)
示例8: combine_features
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def combine_features(self, combmatrix):
ncombs, nrows = combmatrix.shape
for i,row in enumerate(combmatrix):
filename = "comb"+str(i)+"_F"
featuredflist = []
for j,featno in enumerate(row):
groupname = sorted(self.featuremap.keys())[j]
filename += "_"+str(j)+"-"+str(featno) # filename = combNO_F_GROUPNO-FEATNO
extractorinstance = self.featuremap[groupname][featno]
featurematrixpath = extractorinstance.getfeaturematrixpath
featurematrix = IOtools.readcsv(featurematrixpath, keepindex=True)
featuredflist.append(featurematrix)
print filename
print utils.decode_combcode(filename, self.featuremap)
datamatrix = pd.concat(featuredflist, axis=1) #, verify_integrity=True) # CLOSED DUE TO THE OVERLAPPING WORDS IN ABS AND SUBJ LISTS
#datamatrix['index'] = datamatrix.index
#datamatrix = datamatrix.drop_duplicates(cols='index')
#del datamatrix['index']
# replace nan and inf cells !! no. work on matrix, not df. better do this change on learning
#datamatrix[np.isnan(datamatrix)] = 0
#datamatrix[np.isinf(datamatrix)] = 0
datamatrixpath = self.combinedfeaturesfolder + os.sep + filename + ".csv"
IOtools.tocsv(datamatrix, datamatrixpath, keepindex=True)
# record comb name decoding
decodednamesfolder = IOtools.ensure_dir(os.path.join(self.datasetrootpath, metacorpus.decodedcombnamesfoldername))
decodedname = utils.tostr_decoded_combcode(filename, self.featuremap)
IOtools.todisc_txt(decodedname, os.path.join(decodednamesfolder, filename+".txt"))
示例9: get_2_classes
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def get_2_classes(labelrootpath, taggertype, in_NC=5):
out_NC = 2
if in_NC <= out_NC:
return
originallabelspath = os.path.join(labelrootpath, "NC"+metaexperimentation.intrafeatsep+str(in_NC), taggertype+".csv")
labeldf = IOtools.readcsv(originallabelspath, keepindex=True)
outlabelspath = os.path.join(ensure_nclass_dir(labelrootpath, out_NC), taggertype+".csv")
labelvector = labeldf.values
labelvector = np.array(labelvector, dtype=object)
# replace values 12->"sub"; 34->"obj"
labelvector[labelvector == 1] = 12
labelvector[labelvector == 2] = 12
labelvector[labelvector == 3] = 34
labelvector[labelvector == 4] = 34
for i,_ in enumerate(labelvector):
if labelvector[i] == 5:
labelvector[i] = random.choice([12, 34])
twolabeldf = pd.DataFrame(labelvector, columns=labeldf.columns.values.tolist(), index=labeldf.index.values.tolist())
IOtools.tocsv(twolabeldf, outlabelspath, keepindex=True)
示例10: calculate_features
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def calculate_features(self):
incsvpath = self.inputpath
outcsvpath = self.recordpath
postagdf = IOtools.readcsv(incsvpath, keepindex=True)
adj_count_vect = postagdf.loc[:, "ADJ"].values
countdf = pd.DataFrame(adj_count_vect, index=postagdf.index.values.tolist(), columns=[self.fname])
IOtools.tocsv(countdf, outcsvpath, keepindex=True)
示例11: find_annotator_disagreements
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def find_annotator_disagreements(df, outpath):
annotators = get_annotator_names(df)
majority_disagr = initialize_map(annotators)
gold_disagr = initialize_map(annotators)
numofannotations = initialize_map(annotators)
indices = df.index.tolist()
for i in indices:
gold_ans = df.loc[i, "GOLD"]
sentence = df.loc[i, "sentenceid"]
if not sentence.endswith(comment_suffix):
for j1 in range(0, 3): # compare cat1 answers
major_ans = df.loc[i, mvote_colname+"cat1"]
workeranswerpair = df.loc[i, "MA"+str(j1)+"_workerid"]
print workeranswerpair
items = workeranswerpair.split(ANSWERCELL_SEP)
worker = items[0].strip()
answer = items[1].strip()
print answer," ",major_ans," ",gold_ans
if answer != major_ans:
majority_disagr[worker] = majority_disagr[worker] + 1
if answer != gold_ans:
gold_disagr[worker] = gold_disagr[worker] + 1
numofannotations[worker] = numofannotations[worker] + 1
for j1 in range(3, 6): # compare cat1 answers
major_ans = df.loc[i, mvote_colname+"cat2"]
workeranswerpair = df.loc[i, "MA"+str(j1)+"_workerid"]
items = workeranswerpair.split(ANSWERCELL_SEP)
worker = items[0].strip()
answer = items[1].strip()
print answer," ",major_ans," ",gold_ans
if answer != major_ans:
majority_disagr[worker] = majority_disagr[worker] + 1
if answer != gold_ans:
gold_disagr[worker] = gold_disagr[worker] + 1
numofannotations[worker] = numofannotations[worker] + 1
cols = ["annotatorid", "nMajorityDisagr", "nGoldDisagr", "nAnnotations", "weightedMajorityDisagr", "weightedGoldDisagr"]
matrix = np.zeros([len(annotators), len(cols)], dtype=object)
adf = pd.DataFrame(matrix, index=range(len(annotators)), columns=cols)
for i,worker in enumerate(annotators):
adf.loc[i, "annotatorid"] = worker
adf.loc[i, "nMajorityDisagr"] = majority_disagr[worker]
adf.loc[i, "nGoldDisagr"] = gold_disagr[worker]
adf.loc[i, "nAnnotations"] = numofannotations[worker]
if numofannotations[worker] == 0:
adf.loc[i, "weightedMajorityDisagr"] = -1
adf.loc[i, "weightedGoldDisagr"] = -1
else:
adf.loc[i, "weightedMajorityDisagr"] = round(majority_disagr[worker] / float(numofannotations[worker]), 4)
adf.loc[i, "weightedGoldDisagr"] = round(gold_disagr[worker] / float(numofannotations[worker]), 4)
IOtools.tocsv(adf, outpath)
示例12: content_adjectivecount
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def content_adjectivecount(self):
incsvpath = os.path.join(self.inmatrixfolder, "content-postagCOUNT.csv")
fname = "content-adjectivecount"
outcsvpath = os.path.join(self.outmatrixfolder, fname+".csv")
featurename = fname
postagdf = IOtools.readcsv(incsvpath, keepindex=True)
adj_count_vect = postagdf.loc[:, "ADJ"].values
countdf = pd.DataFrame(adj_count_vect, index=postagdf.index.values.tolist(), columns=[featurename])
IOtools.tocsv(countdf, outcsvpath, keepindex=True)
示例13: title_abstractwords_presence
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def title_abstractwords_presence(self):
incsvpath = os.path.join(self.inmatrixfolder, "titletermCOUNT.csv")
outcsvpath = os.path.join(self.outmatrixfolder, "title-abswordsBINARY.csv")
words = keywordhandler.get_abstractwords()
maindf = IOtools.readcsv(incsvpath, keepindex=True)
mainmatrix = maindf.values
np.place(mainmatrix, mainmatrix > 0, 1) # map counts to presence values (1 if count > 0 else 0)
presencedf = pd.DataFrame(mainmatrix, index=maindf.index.values.tolist(), columns=maindf.columns.values.tolist())
filtereddf = matrixhelpers.search_words_in_df(presencedf, words)
IOtools.tocsv(filtereddf, outcsvpath, keepindex=True)
示例14: get_annotation_matrix
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def get_annotation_matrix(df, outfolder,
noworkerid=True,
includehitid=True,
nMturkAnnotators=7, nExpertAnnotators=2,
nMturkFinalCol=1, nExperFinalCol=1,
remove_comments=False):
filesentencemap = get_file_sentence_map(df, remove_comments)
colnames = ["docid", "sentenceid"]
mturkercols = ["MA"+str(i) for i in range(nMturkAnnotators)]
mturk_final_col = ["Mturk_final"]
expertcols = ["EA"+str(i) for i in range(nExpertAnnotators)]
expert_final_col = ["GOLD"]
colnames += mturkercols + mturk_final_col + expertcols + expert_final_col
matrix = []
for filename, sentences in filesentencemap.iteritems():
filedf = df[["hitid", "workerid"] + sentences] # annotations for the sentences of one file
filedf = filedf.dropna(axis=0, how="all", subset=sentences) # clear unrelated rows, those contain no annotations for these sentences
filedf = reindex_df(filedf)
filedf = filedf.fillna(value="NaN")
for sentenceid in sentences:
fileid = filename[len(ANSWER_PREFIX)-1:]
trimmed_sentenceid = sentenceid[len(ANSWER_PREFIX)-1:]
line = [fileid, trimmed_sentenceid]
mturk_answers = []
nannotators = filedf.shape[0]
for i in range(nannotators):
annotatorid = filedf.loc[i, "workerid"]
hitid = filedf.loc[i, "hitid"]
answer = filedf.loc[i, sentenceid]
answer = answer.split("_")[-1]
print "types: ", type(annotatorid), " ", type(answer), " ", answer
answercell = annotatorid + ANSWERCELL_SEP + answer
if noworkerid:
answercell = answer
if includehitid and noworkerid:
if sentenceid.endswith(comment_suffix):
answercell = annotatorid + ANSWERCELL_SEP + answer
#elif sentenceid.endswith(nonsense_suffix):
#answercell = answer
else:
answercell = hitid + ANSWERCELL_SEP + answer
mturk_answers.append(answercell)
line.extend(mturk_answers)
line.extend(np.zeros(len(colnames)-len(line), dtype=int).tolist()) # fill 0's for the yet unknown cols (expert labels)
matrix.append(line)
annotationdf = pd.DataFrame(matrix, columns=colnames)
outcsvpath = os.path.join(outfolder, "annotationdf_whitid_wcomments_noworkerid.csv")
IOtools.tocsv(annotationdf, outcsvpath)
示例15: insert_texts
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import tocsv [as 别名]
def insert_texts(df, sourcedf, outfilepath):
sindices = sourcedf.index.tolist()
c = 0
for i in sindices:
docid = sourcedf.loc[i, "docid"]
sentenceid = sourcedf.loc[i, "sentenceid"]
label = sourcedf.loc[i, "goldlabel"]
text = sourcedf.loc[i, "text"]
df.loc[(df["docid"] == docid) & (df["sentenceid"] == sentenceid), "text"] = text
df.loc[(df["docid"] == docid) & (df["sentenceid"] == sentenceid), "GOLD"] = label
if len(df.loc[(df["docid"] == docid) & (df["sentenceid"] == sentenceid), "GOLD"]) != 0:
c += 1
print c, " found"
IOtools.tocsv(df, outfilepath)