本文整理汇总了Python中sentimentfinding.IOtools.getfoldernames_of_dir方法的典型用法代码示例。如果您正苦于以下问题:Python IOtools.getfoldernames_of_dir方法的具体用法?Python IOtools.getfoldernames_of_dir怎么用?Python IOtools.getfoldernames_of_dir使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sentimentfinding.IOtools
的用法示例。
在下文中一共展示了IOtools.getfoldernames_of_dir方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: best_score_per_annottype
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfoldernames_of_dir [as 别名]
def best_score_per_annottype(self, metricname, scorepath=metaexperimentation.expscorepath):
bigdf = pd.DataFrame(columns=metaexperimentation.performanceheader)
#scorepath = os.path.join(self.experimentspath, "scores")
annottypes = IOtools.getfoldernames_of_dir(scorepath)
for annottype in annottypes:
annotdf = pd.DataFrame(columns=metaexperimentation.performanceheader)
p1 = os.path.join(scorepath, annottype)
#featcombnames = IOtools.getfoldernames_of_dir(p1) # list of combcode_NC names
metricclasses = IOtools.getfoldernames_of_dir(p1)
for metricclass in metricclasses:
p2 = os.path.join(p1, metricclass)
featcombnames = IOtools.getfoldernames_of_dir(p2)
for combname in featcombnames:
p3 = os.path.join(p2, combname)
labelunions = IOtools.getfoldernames_of_dir(p3)
for labelunion in labelunions:
p4 = os.path.join(p3, labelunion)
folds = IOtools.getfoldernames_of_dir(p4)
for fold in folds:
p5 = os.path.join(p4, fold)
scorecsvfilepath = p5 + os.sep + metaexperimentation.scorefilename+".csv"
scorecsvfile = IOtools.readcsv(scorecsvfilepath)
# drop clustering results as they are useless being not worked on (back validation missing)
scorecsvfile = scorecsvfile[np.logical_not(scorecsvfile.algorithm.str.startswith("_MT-Clustering"))]
rankdf = matrixhelpers.get_first_N_rows(scorecsvfile, int(self.N / 2), [metricname], ascend=self.takeworst)
print rankdf.shape
#annotdf.loc[:, rankdf.columns.values.tolist()] = rankdf.values.copy()
print " ** ",annotdf.shape
rankdf["labelunion"] = labelunion
rankdf["featureset"] = metricclass + " ** " + combname
rankdf["annottype"] = annottype
#dflist.append(rankdf)
annotdf = annotdf.append(rankdf)
print scorecsvfile.shape
annotdf = matrixhelpers.get_first_N_rows(annotdf, self.N, [metricname], ascend=self.takeworst)
bigdf = bigdf.append(annotdf)
# insert annottype as colname to bigdf. cutbigdf from the first 10.
bigdf.sort(["annottype", metricname], ascending=self.takeworst, inplace=True)
#resultantdf = matrixhelpers.get_first_N_rows(bigdf, self.N)
evaluationname = self.prefix+"_score_per_annottype-"+metricname.upper()
IOtools.tocsv(bigdf, os.path.join(self.resultspath, evaluationname+".csv"))
示例2: evaluate_crosscorpus
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfoldernames_of_dir [as 别名]
def evaluate_crosscorpus(scoresroot):
featclasses = IOtools.getfoldernames_of_dir(scoresroot)
for featureclass in featclasses:
p1 = os.path.join(scoresroot, featureclass)
lunions = IOtools.getfoldernames_of_dir(p1)
for labelunion in lunions:
p2 = os.path.join(p1, labelunion)
testcases = IOtools.getfoldernames_of_dir(p2)
for testcase in testcases:
p3 = os.path.join(p2, testcase)
traincases = IOtools.getfoldernames_of_dir(p3)
for traincase in traincases:
p4 = os.path.join(p3, traincase) # foldspath
get_allfolds_bigdf(foldrootpath=p4,
annottype=testcase + " ** "+traincase,
featset=featureclass,
labelunion=labelunion)
get_fold_averages(p4)
示例3: get_fold_averages_ablation
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfoldernames_of_dir [as 别名]
def get_fold_averages_ablation():
ablationCVscoresroot = "/home/dicle/Dicle/Tez/corpusstats/learning11/ablation2/"
ablationtypes = ["item", "group", "onedim"]
annotationtypes = ["double"]
featsets = ["redef-rat_lex-rat"]
'''labelunions = ["EACHobj-EACHsubj","ALLobj-ALLsubj","ALLobj-STGsubj",
"STGobj-ALLsubj", "STGobj-STGsubj", "WKobj-WKsubj"]
'''
for ablationtype in ablationtypes:
print ablationtype
p1 = os.path.join(ablationCVscoresroot, ablationtype, "scores")
exclusionnames = IOtools.getfoldernames_of_dir(p1)
for excname in exclusionnames:
bigdf = pd.DataFrame(columns=metaexperimentation.performanceheader)
p2 = os.path.join(p1, excname)
for annottype in annotationtypes:
p3 = os.path.join(p2, annottype)
for featset in featsets:
p4 = os.path.join(p3, featset)
combname = IOtools.getfoldernames_of_dir(p4)[0] # we know that there is only one folder
p5 = os.path.join(p4, combname)
labelunions = IOtools.getfoldernames_of_dir(p5)
for labelunion in labelunions:
p6 = os.path.join(p5, labelunion)
folds = IOtools.getfoldernames_of_dir(p6)
for foldno in folds:
p7 = os.path.join(p6, foldno)
scorecsvfilepath = p7 + os.sep + metaexperimentation.scorefilename+".csv"
scorecsvfile = IOtools.readcsv(scorecsvfilepath)
print " scorefile ",scorecsvfilepath," ",scorecsvfile.shape
#rankdf = matrixhelpers.get_first_N_rows(scorecsvfile, int(N / 2), metricnames, ascend=takeworst)
rankdf = scorecsvfile.copy()
rankdf["labelunion"] = labelunion
rankdf["featureset"] = featset + " ** " + combname
rankdf["annottype"] = annottype
rankdf["fold"] = foldno
#dflist.append(rankdf)
bigdf = bigdf.append(rankdf)
print bigdf.shape," ",p2
IOtools.tocsv(bigdf, os.path.join(p2, "bigdf.csv"))
get_fold_averages(p2)
示例4: get_resourcecatmap
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfoldernames_of_dir [as 别名]
def get_resourcecatmap(corpuspath=metacorpus.rawcorpuspath):
resourcecatmap = {}
resources = IOtools.getfoldernames_of_dir(corpuspath)
for resource in resources:
path = os.path.join(corpuspath, resource)
cats = IOtools.getfoldernames_of_dir(path)
resourcecatmap[resource] = []
for cat in cats:
resourcecatmap[resource].append(resource+"-"+cat)
return resourcecatmap
示例5: get_allfolds_bigdf
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfoldernames_of_dir [as 别名]
def get_allfolds_bigdf(foldrootpath, annottype, featset, labelunion):
bigdf = pd.DataFrame(columns=metaexperimentation.performanceheader)
folds = IOtools.getfoldernames_of_dir(foldrootpath)
for foldno in folds:
p1 = os.path.join(foldrootpath, foldno)
scorecsvfilepath = p1 + os.sep + metaexperimentation.scorefilename+".csv"
scorecsvfile = IOtools.readcsv(scorecsvfilepath)
print " scorefile ",scorecsvfilepath," ",scorecsvfile.shape
#rankdf = matrixhelpers.get_first_N_rows(scorecsvfile, int(N / 2), metricnames, ascend=takeworst)
rankdf = scorecsvfile.copy()
rankdf["labelunion"] = labelunion
rankdf["featureset"] = featset
rankdf["annottype"] = annottype
rankdf["fold"] = foldno
bigdf = bigdf.append(rankdf)
#dflist.append(rankdf)
print "FOLDROOTPATH ",foldrootpath
outcsvpath = os.path.join(foldrootpath, "bigdf.csv")
IOtools.tocsv(bigdf, outcsvpath, False)
示例6: evaluate_featureexcluded_datasets
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfoldernames_of_dir [as 别名]
def evaluate_featureexcluded_datasets():
rootpath = "/home/dicle/Dicle/Tez/corpusstats/learningdata_excludeone/experiments/"
metrics = ["accuracy", "fscore", "precision", "recall"]
scorespath = os.path.join(rootpath, "scores")
exclusiontypes = IOtools.getfoldernames_of_dir(scorespath)
'''
for exclusionname in exclusiontypes:
inputscorespath = os.path.join(scorespath, exclusionname)
recordpath = os.path.join(rootpath, exclusionname)
for minmax in [True, False]:
evaluator = PerformanceEvaluator(expspath=recordpath, takeworst=minmax)
for metric in metrics:
print
evaluator.best_score_per_algorithm(metricname=metric, scorepath=inputscorespath)
evaluator.best_score_per_annottype(metricname=metric, scorepath=inputscorespath)
evaluator.best_score_per_featureset(metricname=metric, scorepath=inputscorespath)
evaluator.best_score_per_labelunion(metricname=metric, scorepath=inputscorespath)
'''
for exclusionname in exclusiontypes:
rankpath = os.path.join(rootpath, exclusionname)
inputscorespath = os.path.join(scorespath, exclusionname)
evaluator = PerformanceEvaluator(expspath=rankpath, takeworst=True)
for metric in metrics:
evaluator.score_stats(metricname=metric, scorepath=inputscorespath)
示例7: add_resource_label
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfoldernames_of_dir [as 别名]
def add_resource_label(matrixpath, datasetname, replacelabel=False, headers=True):
matrixlines = IOtools.readtextlines(matrixpath) # 1st item=fileid, lastitem=filecat.
newmatrix = []
if headers:
matrixlines = matrixlines[2:]
for instance in matrixlines:
items = instance.split()
fileid = items[0]
print instance,
path = datapath+os.sep+datasetname
foldernames = IOtools.getfoldernames_of_dir(datapath+os.sep+datasetname)
#print foldernames
for folder in foldernames:
allfileids = IOtools.getfilenames_of_dir(path+os.sep+folder, removeextension=False)
#print allfileids
if fileid in allfileids:
newspath = path+os.sep+folder+os.sep+fileid
resourcename = texter.getnewsmetadata(newspath, ["resource"])["resource"]
#print "## ",resourcename," ",type(instance)," ~~ ",instance
if replacelabel: items = items[:-1]
newmatrix.append(items +[resourcename])
break
return newmatrix
示例8: buildcorpus
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfoldernames_of_dir [as 别名]
def buildcorpus(nfile, ncat, resourcename, path):
resourcepath = path + os.sep + resourcename
catnames = IOtools.getfoldernames_of_dir(resourcepath)[:ncat]
featurematrix = []
doctermmatrix = []
cfdTermDoc = nltk.ConditionalFreqDist()
for catname in catnames:
fileids = []
p = resourcepath + os.sep + catname + os.sep
fileids.extend(IOtools.getfilenames_of_dir(p, removeextension=False)[:nfile])
corpus = CorpusFeatures(fileids, resourcename+os.sep+catname, p)
corpus.getfeatures()
datapoints = corpus.build_featurematrix()
for k,v in datapoints.iteritems():
featurematrix.append([k]+v+[resourcename])
corpus.plot_features()
#doc term matrix
cfd = corpus.build_termmatrix()
for fileid in cfd.conditions():
for term in list(cfd[fileid]):
cfdTermDoc[fileid].inc(term)
IOtools.todisc_matrix(featurematrix, IOtools.results_rootpath+os.sep+"MATRIX"+str(nfile*ncat)+"texts.txt", mode="a")
示例9: evaluate_crossfeatures
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfoldernames_of_dir [as 别名]
def evaluate_crossfeatures(scoresroot):
featclasses = IOtools.getfoldernames_of_dir(scoresroot)
for featureclass in featclasses:
p1 = os.path.join(scoresroot, featureclass)
lunions = IOtools.getfoldernames_of_dir(p1)
for labelunion in lunions:
p2 = os.path.join(p1, labelunion) # foldspath
get_allfolds_bigdf(foldrootpath=p2,
annottype=featureclass,
featset=featureclass,
labelunion=labelunion)
get_fold_averages(p2)
示例10: conduct_experiments
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfoldernames_of_dir [as 别名]
def conduct_experiments(inrootpath=metacorpus.learningdatapath, outrootpath=metaexperimentation.expscorepath, normalize=False):
annottypes = ["double"]
setsizes = ["150"]
taggertypes = ["user"]
numofcombs = 5
#nclasses = arrange_N_classes.nclasses # [4,5]
#models = []
svmclassifier = SVM("")
clusterer = Clustering("")
nbclassifier = NaiveBayes("")
#nbclassifier = MultinomialNB(outrootpath)
models = [svmclassifier, nbclassifier, clusterer]
for annotationtype in annottypes:
sp1 = IOtools.ensure_dir(os.path.join(outrootpath, annotationtype))
for setsize in setsizes:
sp2 = IOtools.ensure_dir(os.path.join(sp1, setsize))
datasetspath = metacorpus.get_datasets_path(annotationtype, setsize) # finaldatasets
labelspath = metacorpus.get_labels_path(annotationtype, setsize)
nclasses = IOtools.getfoldernames_of_dir(labelspath)
combfilenames = IOtools.getfilenames_of_dir(datasetspath)
combfilenames = combfilenames[:numofcombs]
for combfile in combfilenames:
Xpath = os.path.join(datasetspath, combfile + ".csv")
sp3 = IOtools.ensure_dir(os.path.join(sp2, combfile))
for nclass in nclasses: # count it on labelspath not nclasses
#nclabelspath = arrange_N_classes.nclass_label_folder(labelspath, nc) # get folder path containing nc-grouped labels
nclabelspath = os.path.join(labelspath, nclass)
nc = nclass.split(metaexperimentation.intrafeatsep)[-1]
nc = int(nc)
sp4 = IOtools.ensure_dir(os.path.join(sp3, nclass)) #"NC-"+str(nc)))
for taggertype in taggertypes:
rootscorespath = IOtools.ensure_dir(os.path.join(sp4, taggertype))
metaexperimentation.initialize_score_file(rootscorespath)
ylabelspath = os.path.join(nclabelspath, taggertype+".csv")
for model in models:
#labelnames = metacorpus.get_label_names()
model.prepare_experiment(Xpath, ylabelspath, rootscorespath, labelnames=None, normalize=normalize)
model.apply_algorithms(nc)
示例11: print_accuracy_ablation
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfoldernames_of_dir [as 别名]
def print_accuracy_ablation():
ablationCVscoresroot = "/home/dicle/Dicle/Tez/corpusstats/learning11/ablation2/"
ablationtypes = ["item", "group", "onedim"]
annotationtypes = ["double"]
featsets = ["redef-rat_lex-rat"]
'''labelunions = ["EACHobj-EACHsubj","ALLobj-ALLsubj","ALLobj-STGsubj",
"STGobj-ALLsubj", "STGobj-STGsubj", "WKobj-WKsubj"]
'''
inscorescsv = "fold_stats-ACCURACY.csv"
for ablationtype in ablationtypes:
print ablationtype
p1 = os.path.join(ablationCVscoresroot, ablationtype, "scores")
exclusionnames = IOtools.getfoldernames_of_dir(p1)
for excname in exclusionnames:
print excname
p2 = os.path.join(p1, excname)
accdf = IOtools.readcsv(os.path.join(p2, inscorescsv), False)
#filter for relevant lunions
featset = "redef-rat_lex-rat ** comb975_F_0-0_1-1_2-1_3-3_4-0_5-1_6-1_7-0_8-3"
annottype = "(double"
alg = "_MT-classification_alg-SVC_k-rbf_C-1)"
lunions = ["EACHobj-EACHsubj","ALLobj-ALLsubj","ALLobj-STGsubj",
"STGobj-ALLsubj", "STGobj-STGsubj", "WKobj-WKsubj"]
# get mean accuracy and std
#accdf["meanROUND"] = accdf.iloc[:, 4].values
nrows, ncols = accdf.shape
for l in lunions:
rowname = ", ".join([annottype, featset,l, alg])
rowname = rowname.strip().decode("utf8")
print "q",rowname,"q 00 ",accdf.iloc[nrows-2,0]
print type(rowname)," 00 ",type(accdf.iloc[5,0])
print len(rowname)," 00 ",len(accdf.iloc[5,0])
print rowname == accdf.iloc[nrows-2,0]
xdf = accdf[accdf.iloc[:,0] == rowname]
print l
print "\t",xdf.loc[:, "accround"],"\t",xdf.loc[:, "stdround"]
print
print "\n\n"
示例12: crawl_folds_for_sets
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfoldernames_of_dir [as 别名]
def crawl_folds_for_sets(foldpath, outrootpath):
folds = IOtools.getfoldernames_of_dir(foldpath)
trainitems_fname = "trainitems.csv"
testitems_fname = "testitems.csv"
items = {"trainitems.csv" : [], "testitems.csv" : []}
for fold in folds:
p1 = os.path.join(foldpath, fold)
for fname in items.keys():
p2 = os.path.join(p1, fname)
df = IOtools.readcsv(p2, keepindex=True)
fileids = df.index.values.tolist()
outpath = os.path.join(outrootpath, "all-"+fname[:-4]+".txt")
IOtools.todisc_list(outpath, fileids, mode='a')
items[fname].extend(fileids)
return items
示例13: label_counts_per_split
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfoldernames_of_dir [as 别名]
def label_counts_per_split(rootpath, mix=False, scount=0):
annotagrtype = {"double" : ["fullagr", "halfagr"], "single" : ["halfagr"]}
print "SCOUNT ",[scount]*10
for annottype, agrtypes in annotagrtype.iteritems():
print annottype," --- >"
for agrtype in agrtypes:
lp1 = os.path.join(rootpath, annottype, "labels", agrtype)
labelunions = IOtools.getfoldernames_of_dir(lp1)
for lunion in labelunions:
print lunion," :::::: "
lp2 = os.path.join(lp1, lunion, "labels.csv")
ldf = IOtools.readcsv(lp2, True)
labels = ldf["answer"].values.tolist()
if mix:
ids = ldf.index.values.tolist()
np.random.shuffle(ids)
labels = ldf.loc[ids, "answer"].values.tolist()
matrix = np.empty((len(ids), 2), dtype=object)
matrix[:, 0] = ids
matrix[:, 1] = labels
shuffledldf = pd.DataFrame(labels, index=ids, columns=["answer"])
mixpath = IOtools.ensure_dir(os.path.join(shuffledpath+str(scount), annottype, agrtype, lunion))
mixpath = os.path.join(mixpath, "labels.csv")
IOtools.tocsv(shuffledldf, mixpath, keepindex=True)
#labels = ldf.loc[ids, "answer"].values.tolist()
#np.random.shuffle(labels)
ntest = utils.get_ntest(len(labels))
ltrain = labels[:-ntest]
print "TRAIN ----"
print_label_count(ltrain)
ltest = labels[-ntest:]
print "TEST -----"
print_label_count(ltest)
print "------------"
print "--------------------------"
示例14: recordnewsmetadata_crawltxt
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfoldernames_of_dir [as 别名]
def recordnewsmetadata_crawltxt(corpuspath=metacorpus.rawcorpuspath, resourcefolders=metacorpus.resources, csvfilepath=_metafilepath):
for resource in resourcefolders:
xp1 = IOtools.ensure_dir(os.path.join(corpuspath, resource)) # replicate the folder hierarchy into the xml folder as well
categories = IOtools.getfoldernames_of_dir(xp1)
for cat in categories:
xp2 = IOtools.ensure_dir(os.path.join(xp1, cat))
filenames = IOtools.getfilenames_of_dir(xp2, removeextension=False)
for filename in filenames:
filepath = xp2 + os.sep + filename
metadataline = getmetadata_fromtxt(filepath) #metadataline = getmetadata_fromtxt(filepath+".txt")
#print csvfilepath
IOtools.todisc_txt(metadataline, csvfilepath, mode="a")
print "finished "+resource+"/"+cat
示例15: crawlandmakexmlcorpus
# 需要导入模块: from sentimentfinding import IOtools [as 别名]
# 或者: from sentimentfinding.IOtools import getfoldernames_of_dir [as 别名]
def crawlandmakexmlcorpus():
for resource in resourcefolders:
p1 = os.path.join(rawcorpuspath, resource)
xp1 = IOtools.ensure_dir(os.path.join(xmlcorpuspath, resource)) # replicate the folder hierarchy into the xml folder as well
categories = IOtools.getfoldernames_of_dir(p1)
for cat in categories:
p2 = os.path.join(p1,cat)
xp2 = IOtools.ensure_dir(os.path.join(xp1, cat))
txtfiles = IOtools.getfilenames_of_dir(p2, removeextension=True)
for filename in txtfiles:
txtpath = p2 + os.sep + filename + fromextension
xmlpath = xp2 + os.sep + filename + toextension
txtcontent = IOtools.readtxtfile(txtpath)
xmlcontent = headxml + "\n" + txtcontent + "\n" + footxml
IOtools.todisc_txt(xmlcontent, xmlpath)