本文整理汇总了Python中fuzzywuzzy.process.extract方法的典型用法代码示例。如果您正苦于以下问题:Python process.extract方法的具体用法?Python process.extract怎么用?Python process.extract使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类fuzzywuzzy.process
的用法示例。
在下文中一共展示了process.extract方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: search
# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def search(self, query):
headers = {
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.46 Safari/537.36',
'x-access-token': '1rj2vRtegS8Y60B3w3qNZm5T2Q0TN2NR'
}
# soup = helpers.soupify(helpers.get('https://twist.moe/', allow_redirects=True, headers=headers))
req = helpers.get('https://twist.moe/api/anime', headers=headers)
if 'being redirected' in req.text:
logger.debug('Tring to extract cookie')
cookie = get_cookie(req)
logger.debug('Got cookie: ' + cookie)
headers['cookie'] = cookie
# XXX: Can't use helpers.get here becuse that one is cached. Investigate
req = helpers.get('https://twist.moe/api/anime', headers=headers)
all_anime = req.json()
animes = []
for anime in all_anime:
animes.append(SearchResult(
title=anime['title'],
url='https://twist.moe/a/' + anime['slug']['slug'] + '/',
))
animes = [ani[0] for ani in process.extract(query, animes)]
return animes
示例2: run
# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def run(self):
f = functools.partial(hooked_scorer, terminate_event=self.terminate_event)
try:
res = process.extract(self.s, names, limit=LISTLEN, scorer=f) # f.iStr1.value won't change until Form.Execute() returns.
extracts = []
for i in res:
extracts.append(i[0])
for i in range(10-len(res)):
extracts.append("")
self.refresh_list.emit(*extracts) # call main Thread's UI function.
except TerminateException:
pass
self.stop()
self.finished.emit()
# --------------------------------------------------------------------------
示例3: _get_page
# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def _get_page(self, topic, request_options=None):
topics_list = self.get_topics_list()
if topic.startswith(':'):
topics_list = [x for x in topics_list if x.startswith(':')]
else:
topics_list = [x for x in topics_list if not x.startswith(':')]
if _USING_FUZZYWUZZY:
possible_topics = process.extract(topic, topics_list, scorer=fuzz.ratio)[:3]
else:
possible_topics = process.extract(topic, topics_list, limit=3, scorer=fuzz.ratio)
possible_topics_text = "\n".join([(" * %s %s" % x) for x in possible_topics])
return """
Unknown topic.
Do you mean one of these topics maybe?
%s
""" % possible_topics_text
示例4: fuzzy_fuzzywuzzy
# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def fuzzy_fuzzywuzzy(fuzz, user_input, collection):
'''编辑距离,速度比较慢,比起匹配方法,能够处理字符不一样的问题'''
collection_new = []
len_user_input = len(user_input)
for coll in collection: # 获取包含一个字符的,如果不包含,就返回错误
for i in range(len_user_input):
if user_input[i] in coll:
collection_new.append(coll)
if not collection_new:
return None
collection_new = list(set(collection_new))
same_char_list = []
for collection_new_one in collection_new: # 获取相同字符串多的问题
count_same_char_one = count_same_char(user_input, collection_new_one)
same_char_list.append((collection_new_one, count_same_char_one))
same_char_list.sort(key=lambda x: x[1], reverse=True)
if len(same_char_list) >= 500:
same_char_list = same_char_list[0: 500]
result = process.extract(user_input, same_char_list, scorer=fuzz.token_set_ratio, limit=20)
return result
示例5: _genres_from_slots
# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def _genres_from_slots(self, slots: Iterable[str], genres: Iterable[str]):
def genres_from(g):
if not g:
return set()
res = process.extract(g, genres)[:MAX_GUESSES_PER_SLOT]
print_d("Raw genre results: {data}", data=res)
for g, c in res:
# Exact(ish) matches shouldn't allow other genres
if c > MinConfidences.SINGLE_GENRE:
return {g}
return {g for g, c in res
if g and int(c) >= MinConfidences.MULTI_GENRE}
# Grr where's my foldl
results = set()
for slot in slots:
results |= genres_from(slot)
return results
示例6: fuzzy_match_word
# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def fuzzy_match_word(word, words, limit):
"""
Fuzzy find the indexes of word in words, returns a list of indexes which match the
best return from fuzzy.
limit controls the number of choices to allow.
"""
# Try finding exact matches
exact_matches = set([i for (i, w) in enumerate(words) if w == word])
if exact_matches:
logging.debug("Found exact match for {}".format(word))
# Else, return fuzzy matching
logging.debug("No exact match for: {}".format(word))
# Allow some variance which extractOne misses
# For example: "Armstrong World Industries Inc" in "Armstrong World Industries Inc. agreed in principle to sell its carpet operations to Shaw Industries Inc ."
best_matches = [w for (w, s) in process.extract(word, words, processor = semi_process, limit = limit) if (s > 70)]
logging.debug("Best matches = {}".format(best_matches))
return list(exact_matches.union([i for (i, w) in enumerate(words) if w in best_matches]))
# Flatten a list of lists
示例7: find_index_tag
# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def find_index_tag(tag, tlist):
"""
Returns a list with first element as tag and rest indexes of todos with that tag
"""
index_list = []
similar_tags = []
for i, todo in enumerate(tlist):
similar_tags = process.extract(tag, todo.tags)
if len(similar_tags) > 0:
if len(index_list) == 0:
index_list.append(similar_tags[0][0])
if similar_tags[0][1] > 70:
index_list.append(i)
return index_list
示例8: extract
# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def extract(self, context):
entities = process.extract(context["query"], self.keywords)
print(entities)
entities = filter(lambda x: x[1] >= self.threshold, entities)
entities = sorted(entities, key=lambda x: x[1] + len(x[0])/10, reverse=True)
entities = list(map(lambda x: Tag(TAGMAP[x[0]]), entities))
if len(entities) == 0:
return None
return entities[0]
示例9: transform
# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def transform(self, context):
return self.extract(context)
示例10: search
# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def search(self, query):
def processor(x):
if isinstance(x, Issue):
x = x.title
return x.strip().lower()
# We don't care about the score, so return first element
# This must not happen while updating the self.issues dict so acquire the lock
with self.issues_lock:
return [result[0] for result in process.extract(query, self.issues, scorer=fuzz.partial_ratio,
processor=processor, limit=5)]
示例11: search_station
# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def search_station(self, query):
"""
search station by name or code
"""
return [x[0] for x in process.extract(query, self.stations)]
示例12: search_train
# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def search_train(self, query):
"""
search train by name or number
"""
return [x[0] for x in process.extract(query, self.trains.values())]
示例13: search_term
# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def search_term(self, txt, pw_count):
pws = pw_count.keys()
if type(txt) == str:
txt = [txt]
total_matches = 0
for t in txt:
matches = [r[0] for r in process.extract(t, pws, limit=None) if r[1] > 75]
total_matches += sum([pw_count[p]["count"] for p in matches])
return total_matches
示例14: search_passwords_fuzzy
# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def search_passwords_fuzzy(query):
''' Search passwords using the Fuzzy search method using fuzzywuzzy'''
passwords = list_passwords()
return [entry[0] for entry in process.extract(query, passwords)]
示例15: fuzzy
# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def fuzzy(tokens):
averages = []
for token in tokens:
sameTokenRemoved = False
result = process.extract(token, tokens, scorer=fuzz.partial_ratio)
scores = []
for each in result:
score = each[1]
if score == 100 and not sameTokenRemoved:
sameTokenRemoved = True
continue
scores.append(score)
average = statistics.mean(scores)
averages.append(average)
return statistics.mean(averages)