本文整理汇总了Python中SonicScrewdriver.add_dicts方法的典型用法代码示例。如果您正苦于以下问题:Python SonicScrewdriver.add_dicts方法的具体用法?Python SonicScrewdriver.add_dicts怎么用?Python SonicScrewdriver.add_dicts使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类SonicScrewdriver
的用法示例。
在下文中一共展示了SonicScrewdriver.add_dicts方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: select_common_features
# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import add_dicts [as 别名]
def select_common_features(trainingset, n):
''' Very simply, selects the top n features in the training set.
Not a sophisticated feature-selection strategy, but in many
cases it gets the job done.
'''
allwordcounts = dict()
for avolume in trainingset:
utils.add_dicts(avolume.rawcounts, allwordcounts)
# The add_dicts function will add up all the raw counts into
# a single master dictionary.
descendingbyfreq = utils.sortkeysbyvalue(allwordcounts, whethertoreverse = True)
# This returns a list of 2-tuple (frequency, word) pairs.
if n > len(descendingbyfreq):
n = len(descendingbyfreq)
print("We only have " + str(n) + " features.")
# List comprehension that gets the second element of each tuple, up to
# a total of n tuples.
topfeatures = [x[1] for x in descendingbyfreq[0 : n]]
return topfeatures
示例2: main
# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import add_dicts [as 别名]
def main(listofmodels = ["newfeatures6", "newfeatures2", "newfeatures3", "newfeatures4", "newfeatures9", "forest", "bycallno", "forest4", "forest7"]):
genretranslations = {'subsc' : 'front', 'argum': 'non', 'pref': 'non', 'aut': 'bio', 'bio': 'bio', 'toc': 'front', 'title': 'front', 'bookp': 'front', 'bibli': 'back', 'gloss': 'back', 'epi': 'fic', 'errat': 'non', 'notes': 'non', 'ora': 'non', 'let': 'bio', 'trv': 'non', 'lyr': 'poe', 'nar': 'poe', 'vdr': 'dra', 'pdr': 'dra', 'clo': 'dra', 'impri': 'front', 'libra': 'back', 'index': 'back'}
predictroot = "/Volumes/TARDIS/output/"
firstdir = predictroot + listofmodels[0] + "/"
predictfiles = os.listdir(firstdir)
validfiles = list()
for filename in predictfiles:
if filename.endswith(".predict"):
validfiles.append(filename)
groundtruthdir = "/Users/tunder/Dropbox/pagedata/newfeatures/genremaps/"
groundtruthfiles = os.listdir(groundtruthdir)
groundtruths = dict()
htidtable = dict()
for filename in validfiles:
gt = get_ground_truth_file(filename)
if not gt in groundtruthfiles:
continue
htid = gt[0:-4]
htidtable[filename] = htid
if gt != "":
groundtruth = get_ground_truth(gt, groundtruthdir, genretranslations)
groundtruths[htid] = groundtruth
dissensus = dict()
pageprobsforfile = dict()
for filename in validfiles:
htid = htidtable[filename]
versions = list()
pageprobs = list()
for model in listofmodels:
try:
thispath = predictroot + model + "/" + filename
with open(thispath, encoding="utf-8") as f:
filelines = f.readlines()
if len(pageprobs) < len(filelines):
# Initialize page probabilities to correct length.
if len(pageprobs) > 0:
print("Initializing more than once. Error condition.")
for i in range(len(filelines)):
newdict = dict()
pageprobs.append(newdict)
smoothlist = list()
roughlist = list()
for i in range(len(filelines)):
line = filelines[i]
line = line.rstrip()
fields = line.split('\t')
rough = fields[1]
smoothed = fields[2]
smoothlist.append(smoothed)
roughlist.append(rough)
if len(fields) > 5:
probdict = interpret_probabilities(fields[5:])
utils.add_dicts(probdict, pageprobs[i])
# This will add all the probabilities for this page to the
# record of per-page probabilities.
versions.append(smoothlist)
versions.append(roughlist)
except:
pass
pageprobsforfile[htid] = pageprobs
dissensus[htid] = [x for x in zip(*versions)]
consensus = dict()
dissentperfile = dict()
secondthoughts = dict()
dissentsequences = dict()
for htid, pagelist in dissensus.items():
winners = list()
runnersup = list()
dissentseq = list()
pageprobs = pageprobsforfile[htid]
for i in range(len(pagelist)):
page = pagelist[i]
floatwinner = maxkey(pageprobs[i])
winner, dissent, runnerup = resolve_voting(page, floatwinner)
winners.append(winner)
runnersup.append(runnerup)
dissentseq.append(dissent)
consensus[htid] = winners
secondthoughts[htid] = runnersup
dissentsequences[htid] = dissentseq
return consensus, secondthoughts, pageprobsforfile, dissentsequences, groundtruths
示例3: list
# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import add_dicts [as 别名]
smoothlist = list()
roughlist = list()
for i in range(len(filelines)):
line = filelines[i]
line = line.rstrip()
fields = line.split('\t')
rough = fields[1]
smoothed = fields[2]
smoothlist.append(smoothed)
roughlist.append(rough)
if len(fields) > 5:
probdict = interpret_probabilities(fields[5:])
# probdict = normalize(probdict)
# make them all sum to 1
utils.add_dicts(probdict, pageprobs[i])
# This will add all the probabilities for this page to the
# record of per-page probabilities.
versions.append(smoothlist)
versions.append(roughlist)
except:
pass
pageprobsforfile[filename] = pageprobs
dissensus[filename] = [x for x in zip(*versions)]
def maxkey(dictionary):
tuplelist = utils.sortkeysbyvalue(dictionary, whethertoreverse = True)
winner = tuplelist[0][1]