本文整理汇总了Python中SonicScrewdriver.sortkeysbyvalue方法的典型用法代码示例。如果您正苦于以下问题:Python SonicScrewdriver.sortkeysbyvalue方法的具体用法?Python SonicScrewdriver.sortkeysbyvalue怎么用?Python SonicScrewdriver.sortkeysbyvalue使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类SonicScrewdriver
的用法示例。
在下文中一共展示了SonicScrewdriver.sortkeysbyvalue方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: sequence_to_counts
# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import sortkeysbyvalue [as 别名]
def sequence_to_counts(genresequence):
'''Converts a sequence of page-level predictions to
a dictionary of counts reflecting the number of pages
assigned to each genre. Also reports the largest genre.
Note that this function cannot return "bio." If
biography is the largest genre it returns "non"fiction.
It counts bio, but ensures that all votes for bio are also votes
for non.
'''
genrecounts = dict()
for page in genresequence:
utils.addtodict(page, 1, genrecounts)
if page == 'bio':
utils.addtodict('non', 1, genrecounts)
# Convert the dictionary of counts into a sorted list, and take the max.
genretuples = utils.sortkeysbyvalue(genrecounts, whethertoreverse = True)
maxgenre = genretuples[0][1]
if maxgenre == 'bio':
maxgenre = 'non'
return genrecounts, maxgenre
示例2: select_common_features
# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import sortkeysbyvalue [as 别名]
def select_common_features(trainingset, n):
''' Very simply, selects the top n features in the training set.
Not a sophisticated feature-selection strategy, but in many
cases it gets the job done.
'''
allwordcounts = dict()
for avolume in trainingset:
utils.add_dicts(avolume.rawcounts, allwordcounts)
# The add_dicts function will add up all the raw counts into
# a single master dictionary.
descendingbyfreq = utils.sortkeysbyvalue(allwordcounts, whethertoreverse = True)
# This returns a list of 2-tuple (frequency, word) pairs.
if n > len(descendingbyfreq):
n = len(descendingbyfreq)
print("We only have " + str(n) + " features.")
# List comprehension that gets the second element of each tuple, up to
# a total of n tuples.
topfeatures = [x[1] for x in descendingbyfreq[0 : n]]
return topfeatures
示例3: sequence_to_counts
# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import sortkeysbyvalue [as 别名]
def sequence_to_counts(genresequence):
'''Converts a sequence of page-level predictions to
a dictionary of counts reflecting the number of pages
assigned to each genre. Also reports the largest genre.'''
genrecounts = dict()
genrecounts['fic'] = 0
genrecounts['poe'] = 0
genrecounts['dra'] = 0
genrecounts['non'] = 0
for page in genresequence:
indexas = page
# For this purpose, we treat biography and indexes as equivalent to nonfiction.
if page == "bio" or page == "index" or page == "back" or page == "trv":
indexas = "non"
utils.addtodict(indexas, 1, genrecounts)
# Convert the dictionary of counts into a sorted list, and take the max.
genretuples = utils.sortkeysbyvalue(genrecounts, whethertoreverse = True)
maxgenre = genretuples[0][1]
return genrecounts, maxgenre
示例4: resolve_voting
# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import sortkeysbyvalue [as 别名]
def resolve_voting(votes, tiebreaker):
electorate = len(votes)
results = dict()
for vote in votes:
# if vote == "bio":
# vote = "non"
utils.addtodict(vote, 1, results)
candidate = utils.sortkeysbyvalue(results, whethertoreverse = True)
dissent = (electorate - candidate[0][0]) / electorate
if len(candidate) < 2:
# There is only one candidate.
return candidate[0][1], dissent, candidate[0][1]
elif candidate[0][0] > candidate[1][0]:
# We have a majority.
return candidate[0][1], dissent, candidate[1][1]
else:
# We have a tie.
if tiebreaker == candidate[0][1]:
print("Tiebreaker " + tiebreaker)
return candidate[0][1], dissent, candidate[1][1]
elif tiebreaker == candidate[1][1]:
print("Tiebreaker " + tiebreaker)
return candidate[1][1], dissent, candidate[0][1]
else:
print("Tie in spite of " + tiebreaker)
win = random.choice([candidate[0][1], candidate[1][1]])
if win == candidate[0][1]:
runnerup = candidate[1][1]
else:
runnerup = candidate[0][1]
return win, dissent, runnerup
示例5: maxkey
# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import sortkeysbyvalue [as 别名]
def maxkey(dictionary):
tuplelist = utils.sortkeysbyvalue(dictionary, whethertoreverse = True)
winner = tuplelist[0][1]
# if winner == "bio":
# winner = "non"
return winner
示例6: censor
# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import sortkeysbyvalue [as 别名]
def censor(htid, genresequence):
htid = utils.pairtreelabel(htid)
# convert the htid into a dirty pairtree label for metadata matching
# Create a dictionary with entries for all possible conditions, initially set negative.
symptoms = ["weakconfirmation", "weakdenial", "strongconfirmation", "strongdenial", "modelagrees", "modeldisagrees"]
reported = dict()
for symptom in symptoms:
reported[symptom] = 0
couldbefiction = True
# Now we need to assess the largest genre in this volume.
genrecounts = dict()
genrecounts['fic'] = 0
genrecounts['poe'] = 0
genrecounts['dra'] = 0
genrecounts['non'] = 0
for page in genresequence:
indexas = page
# For this purpose, we treat biography and indexes as equivalent to nonfiction.
if page == "bio" or page == "index" or page == "back":
indexas = "non"
utils.addtodict(indexas, 1, genrecounts)
# Convert the dictionary of counts into a sorted list, and take the max.
genretuples = utils.sortkeysbyvalue(genrecounts, whethertoreverse = True)
maxgenre = genretuples[0][1]
if htid not in rowindices and htid not in modelindices:
return genresequence, reported
if htid in rowindices:
genrestring = metadata["genres"][htid]
genreinfo = genrestring.split(";")
# It's a semicolon-delimited list of items.
for info in genreinfo:
if info == "Biography" or info == "Autobiography":
couldbefiction = False
if info == "biog?" and maxgenre == "non":
reported["weakconfirmation"] = 1
if info == "biog?" and maxgenre != "non":
reported["weakdenial"] = 1
if info == "Not fiction" and maxgenre == "non":
reported["weakconfirmation"] = 1
if info == "Not fiction" and maxgenre == "fic":
reported["weakdenial"] = 1
if (info == "Fiction" or info == "Novel") and maxgenre == "fic":
reported["strongconfirmation"] = 1
if (info == "Fiction" or info == "Novel") and maxgenre != "fic":
reported["strongdenial"] = 1
if info == "Biography" and maxgenre == "non":
reported["strongconfirmation"] = 1
if info == "Biography" and maxgenre != "non":
reported["strongdenial"] = 1
if info == "Autobiography" and maxgenre == "non":
reported["strongconfirmation"] = 1
if info == "Autobiography" and maxgenre != "non":
reported["strongdenial"] = 1
if (info == "Poetry" or info == "Poems") and maxgenre == "poe":
reported["strongconfirmation"] = 1
if (info == "Poetry" or info == "Poems") and maxgenre != "poe":
reported["strongdenial"] = 1
if (info == "Drama" or info == "Tragedies" or info == "Comedies") and maxgenre == "dra":
reported["strongconfirmation"] = 1
if (info == "Drama" or info == "Tragedies" or info == "Comedies") and maxgenre != "dra":
reported["strongdenial"] = 1
if (info == "Catalog" or info == "Dictionary" or info=="Bibliographies") and maxgenre == "non":
reported["strongconfirmation"] = 1
couldbefiction = False
if (info == "Catalog" or info == "Dictionary" or info=="Bibliographies") and maxgenre != "non":
reported["strongdenial"] = 1
else:
print("Skipped.")
if htid in modelindices:
modelpredictions = dict()
for genre, genrecolumn in modeldata.items():
if not genre in options:
# this column is not a genre!
continue
modelpredictions[genre] = float(genrecolumn[htid])
predictionlist = utils.sortkeysbyvalue(modelpredictions, whethertoreverse = True)
modelprediction = predictionlist[0][1]
#.........这里部分代码省略.........
示例7: metadata_check
# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import sortkeysbyvalue [as 别名]
def metadata_check(htid, inputsequence):
global options, rowindices, columns, metadata, modelindices, modelcolumns, modeldata
'''Assesses whether previous metadata tend to deny or confirm the
thrust of page-level genre predictions. For this purpose we use both
genre codes extracted from the MARC record and the predictions of a volume-
level probabilistic model.
Returns two parameters: 1) a dictionary of "confirmations" that indicate
whether metadata aligns with page-level predictions in six specific ways.
2) The "maxgenre" or genre most commonly predicted at the page level.'''
genresequence = [x for x in inputsequence]
# make a defensive copy of incoming parameter
htid = utils.pairtreelabel(htid)
# convert the htid into a dirty pairtree label for metadata matching
# Create a dictionary with entries for all possible conditions, initially set negative.
symptoms = ["weakconfirmation", "weakdenial", "strongconfirmation", "strongdenial", "modelagrees", "modeldisagrees"]
# The first four of these symptoms reflect metadata extracted from the MARC record. Weakconfirmation and
# weakdenial are based on flags extracted from controlfield 008 which I find are not very reliable as guides.
# Strongconfirmation and strongdenial are based on strings extracted from other fields that are more
# specific and reliable as indications of genre. Modelagrees and modeldisagrees reflect the alignment of
# page-level predictions with an earlier volume-level model of the corpus.
confirmations = dict()
for symptom in symptoms:
confirmations[symptom] = 0
genrecounts, maxgenre = sequence_to_counts(genresequence)
if htid not in rowindices and htid not in modelindices:
return confirmations
if htid in rowindices:
genrestring = metadata["genres"][htid]
genreinfo = genrestring.split(";")
# It's a semicolon-delimited list of items.
for info in genreinfo:
# if info == "biog?" and maxgenre == "non":
# confirmations["weakconfirmation"] = 1
# if info == "biog?" and maxgenre != "non":
# confirmations["weakdenial"] = 1
if info == "Not fiction" and maxgenre == "non":
confirmations["weakconfirmation"] = 1
if info == "Not fiction" and maxgenre == "fic":
confirmations["weakdenial"] = 1
if (info == "Fiction" or info == "Novel") and maxgenre == "fic":
confirmations["strongconfirmation"] = 1
if (info == "Fiction" or info == "Novel") and maxgenre != "fic":
confirmations["strongdenial"] = 1
if info == "Biography" and maxgenre == "non":
confirmations["strongconfirmation"] = 1
if info == "Biography" and maxgenre != "non":
confirmations["strongdenial"] = 1
if info == "Autobiography" and maxgenre == "non":
confirmations["strongconfirmation"] = 1
if info == "Autobiography" and maxgenre != "non":
confirmations["strongdenial"] = 1
if (info == "Poetry" or info == "Poems") and maxgenre == "poe":
confirmations["strongconfirmation"] = 1
if (info == "Poetry" or info == "Poems") and maxgenre != "poe":
confirmations["strongdenial"] = 1
if (info == "Drama" or info == "Tragedies" or info == "Comedies") and maxgenre == "dra":
confirmations["strongconfirmation"] = 1
if (info == "Drama" or info == "Tragedies" or info == "Comedies") and maxgenre != "dra":
confirmations["strongdenial"] = 1
if (info == "Catalog" or info == "Dictionary" or info=="Bibliographies") and maxgenre == "non":
confirmations["strongconfirmation"] = 1
couldbefiction = False
if (info == "Catalog" or info == "Dictionary" or info=="Bibliographies") and maxgenre != "non":
confirmations["strongdenial"] = 1
else:
print("Skipped.")
if htid in modelindices:
modelpredictions = dict()
for genre, genrecolumn in modeldata.items():
if not genre in options:
# this column is not a genre!
continue
modelpredictions[genre] = float(genrecolumn[htid])
predictionlist = utils.sortkeysbyvalue(modelpredictions, whethertoreverse = True)
modelprediction = predictionlist[0][1]
modelconfidence = predictionlist[0][0]
nextclosest = predictionlist[1][0]
# Take the top prediction.
# For purposes of this routine, treat biography as nonfiction:
#.........这里部分代码省略.........