当前位置: 首页>>代码示例>>Python>>正文


Python SonicScrewdriver.add_dicts方法代码示例

本文整理汇总了Python中SonicScrewdriver.add_dicts方法的典型用法代码示例。如果您正苦于以下问题:Python SonicScrewdriver.add_dicts方法的具体用法?Python SonicScrewdriver.add_dicts怎么用?Python SonicScrewdriver.add_dicts使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在SonicScrewdriver的用法示例。


在下文中一共展示了SonicScrewdriver.add_dicts方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: select_common_features

# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import add_dicts [as 别名]
def select_common_features(trainingset, n):
	''' Very simply, selects the top n features in the training set.
	Not a sophisticated feature-selection strategy, but in many
	cases it gets the job done.
	'''
	allwordcounts = dict()

	for avolume in trainingset:
		utils.add_dicts(avolume.rawcounts, allwordcounts)
		# The add_dicts function will add up all the raw counts into
		# a single master dictionary.

	descendingbyfreq = utils.sortkeysbyvalue(allwordcounts, whethertoreverse = True)
	# This returns a list of 2-tuple (frequency, word) pairs.

	if n > len(descendingbyfreq):
		n = len(descendingbyfreq)
		print("We only have " + str(n) + " features.")

	# List comprehension that gets the second element of each tuple, up to
	# a total of n tuples.

	topfeatures = [x[1] for x in descendingbyfreq[0 : n]]

	return topfeatures
开发者ID:tedunderwood,项目名称:GenreProject,代码行数:27,代码来源:logistic.py

示例2: main

# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import add_dicts [as 别名]
def main(listofmodels = ["newfeatures6", "newfeatures2", "newfeatures3", "newfeatures4", "newfeatures9", "forest", "bycallno", "forest4", "forest7"]):

    genretranslations = {'subsc' : 'front', 'argum': 'non', 'pref': 'non', 'aut': 'bio', 'bio': 'bio', 'toc': 'front', 'title': 'front', 'bookp': 'front', 'bibli': 'back', 'gloss': 'back', 'epi': 'fic', 'errat': 'non', 'notes': 'non', 'ora': 'non', 'let': 'bio', 'trv': 'non', 'lyr': 'poe', 'nar': 'poe', 'vdr': 'dra', 'pdr': 'dra', 'clo': 'dra', 'impri': 'front', 'libra': 'back', 'index': 'back'}

    predictroot = "/Volumes/TARDIS/output/"
    firstdir = predictroot + listofmodels[0] + "/"
    predictfiles = os.listdir(firstdir)

    validfiles = list()

    for filename in predictfiles:
        if filename.endswith(".predict"):
            validfiles.append(filename)

    groundtruthdir = "/Users/tunder/Dropbox/pagedata/newfeatures/genremaps/"

    groundtruthfiles = os.listdir(groundtruthdir)

    groundtruths = dict()
    htidtable = dict()
    for filename in validfiles:
        gt = get_ground_truth_file(filename)
        if not gt in groundtruthfiles:
            continue
        htid = gt[0:-4]
        htidtable[filename] = htid
        if gt != "":
            groundtruth = get_ground_truth(gt, groundtruthdir, genretranslations)
            groundtruths[htid] = groundtruth

    dissensus = dict()
    pageprobsforfile = dict()

    for filename in validfiles:
        htid = htidtable[filename]
        versions = list()
        pageprobs = list()
        for model in listofmodels:
            try:
                thispath = predictroot + model + "/" + filename
                with open(thispath, encoding="utf-8") as f:
                    filelines = f.readlines()

                if len(pageprobs) < len(filelines):
                    # Initialize page probabilities to correct length.
                    if len(pageprobs) > 0:
                        print("Initializing more than once. Error condition.")
                    for i in range(len(filelines)):
                        newdict = dict()
                        pageprobs.append(newdict)

                smoothlist = list()
                roughlist = list()
                for i in range(len(filelines)):
                    line = filelines[i]
                    line = line.rstrip()
                    fields = line.split('\t')
                    rough = fields[1]
                    smoothed = fields[2]
                    smoothlist.append(smoothed)
                    roughlist.append(rough)
                    if len(fields) > 5:
                        probdict = interpret_probabilities(fields[5:])
                        utils.add_dicts(probdict, pageprobs[i])
                        # This will add all the probabilities for this page to the
                        # record of per-page probabilities.

                versions.append(smoothlist)
                versions.append(roughlist)
            except:
                pass
        pageprobsforfile[htid] = pageprobs

        dissensus[htid] = [x for x in zip(*versions)]

    consensus = dict()
    dissentperfile = dict()
    secondthoughts = dict()
    dissentsequences = dict()

    for htid, pagelist in dissensus.items():
        winners = list()
        runnersup = list()
        dissentseq = list()
        pageprobs = pageprobsforfile[htid]
        for i in range(len(pagelist)):
            page = pagelist[i]
            floatwinner = maxkey(pageprobs[i])
            winner, dissent, runnerup = resolve_voting(page, floatwinner)
            winners.append(winner)
            runnersup.append(runnerup)
            dissentseq.append(dissent)
        consensus[htid] = winners
        secondthoughts[htid] = runnersup
        dissentsequences[htid] = dissentseq

    return consensus, secondthoughts, pageprobsforfile, dissentsequences, groundtruths
开发者ID:tedunderwood,项目名称:HathiGenreTrainingset,代码行数:99,代码来源:EnsembleModule.py

示例3: list

# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import add_dicts [as 别名]
			smoothlist = list()
			roughlist = list()
			for i in range(len(filelines)):
				line = filelines[i]
				line = line.rstrip()
				fields = line.split('\t')
				rough = fields[1]
				smoothed = fields[2]
				smoothlist.append(smoothed)
				roughlist.append(rough)
				if len(fields) > 5:
					probdict = interpret_probabilities(fields[5:])
					# probdict = normalize(probdict)
					# make them all sum to 1
					utils.add_dicts(probdict, pageprobs[i])
					# This will add all the probabilities for this page to the
					# record of per-page probabilities.

			versions.append(smoothlist)
			versions.append(roughlist)

		except:
			pass
	pageprobsforfile[filename] = pageprobs

	dissensus[filename] = [x for x in zip(*versions)]

def maxkey(dictionary):
	tuplelist = utils.sortkeysbyvalue(dictionary, whethertoreverse = True)
	winner = tuplelist[0][1]
开发者ID:tedunderwood,项目名称:HathiGenreTrainingset,代码行数:32,代码来源:Ensemble.py


注:本文中的SonicScrewdriver.add_dicts方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。