Python process.extract方法代码示例

本文整理汇总了Python中fuzzywuzzy.process.extract方法的典型用法代码示例。如果您正苦于以下问题:Python process.extract方法的具体用法?Python process.extract怎么用?Python process.extract使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在fuzzywuzzy.process的用法示例。


示例1: search

# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def search(self, query):
        headers = {
        'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.46 Safari/537.36',
        'x-access-token': '1rj2vRtegS8Y60B3w3qNZm5T2Q0TN2NR'
        # soup = helpers.soupify(helpers.get('https://twist.moe/', allow_redirects=True, headers=headers))
        req = helpers.get('https://twist.moe/api/anime', headers=headers)
        if 'being redirected' in req.text:
            logger.debug('Tring to extract cookie')
            cookie = get_cookie(req)
            logger.debug('Got cookie: ' + cookie)
            headers['cookie'] = cookie
            # XXX: Can't use helpers.get here becuse that one is cached. Investigate
            req = helpers.get('https://twist.moe/api/anime', headers=headers)
        all_anime = req.json()
        animes = []
        for anime in all_anime:
                url='https://twist.moe/a/' + anime['slug']['slug'] + '/',
        animes = [ani[0] for ani in process.extract(query, animes)]
        return animes 

示例2: run

# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def run(self):
        f = functools.partial(hooked_scorer, terminate_event=self.terminate_event)
            res = process.extract(self.s, names, limit=LISTLEN, scorer=f)  # f.iStr1.value won't change until Form.Execute() returns.
            extracts = []
            for i in res:
            for i in range(10-len(res)):
            self.refresh_list.emit(*extracts)  # call main Thread's UI function.
        except TerminateException:

# -------------------------------------------------------------------------- 

示例3: _get_page

# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def _get_page(self, topic, request_options=None):
        topics_list = self.get_topics_list()
        if topic.startswith(':'):
            topics_list = [x for x in topics_list if x.startswith(':')]
            topics_list = [x for x in topics_list if not x.startswith(':')]

            possible_topics = process.extract(topic, topics_list, scorer=fuzz.ratio)[:3]
            possible_topics = process.extract(topic, topics_list, limit=3, scorer=fuzz.ratio)
        possible_topics_text = "\n".join([("    * %s %s" % x) for x in possible_topics])
        return """
Unknown topic.
Do you mean one of these topics maybe?

    """ % possible_topics_text 

示例4: fuzzy_fuzzywuzzy

# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def fuzzy_fuzzywuzzy(fuzz, user_input, collection):
    collection_new = []
    len_user_input = len(user_input)
    for coll in collection:  # 获取包含一个字符的,如果不包含,就返回错误
        for i in range(len_user_input):
            if user_input[i] in coll:
    if not collection_new:
        return None
    collection_new = list(set(collection_new))

    same_char_list = []
    for collection_new_one in collection_new: # 获取相同字符串多的问题
        count_same_char_one = count_same_char(user_input, collection_new_one)
        same_char_list.append((collection_new_one, count_same_char_one))
    same_char_list.sort(key=lambda x: x[1], reverse=True)
    if len(same_char_list) >= 500:
        same_char_list = same_char_list[0: 500]

    result =  process.extract(user_input, same_char_list, scorer=fuzz.token_set_ratio, limit=20)
    return result 

示例5: _genres_from_slots

# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def _genres_from_slots(self, slots: Iterable[str], genres: Iterable[str]):
        def genres_from(g):
            if not g:
                return set()
            res = process.extract(g, genres)[:MAX_GUESSES_PER_SLOT]
            print_d("Raw genre results: {data}", data=res)
            for g, c in res:
                # Exact(ish) matches shouldn't allow other genres
                if c > MinConfidences.SINGLE_GENRE:
                    return {g}
            return {g for g, c in res
                    if g and int(c) >= MinConfidences.MULTI_GENRE}

        # Grr where's my foldl
        results = set()
        for slot in slots:
            results |= genres_from(slot)
        return results 

示例6: fuzzy_match_word

# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def fuzzy_match_word(word, words, limit):
    Fuzzy find the indexes of word in words, returns a list of indexes which match the
    best return from fuzzy.
    limit controls the number of choices to allow.
    # Try finding exact matches
    exact_matches = set([i for (i, w) in enumerate(words) if w == word])
    if exact_matches:
        logging.debug("Found exact match for {}".format(word))

    # Else, return fuzzy matching
    logging.debug("No exact match for: {}".format(word))
    # Allow some variance which extractOne misses
    # For example: "Armstrong World Industries Inc" in "Armstrong World Industries Inc. agreed in principle to sell its carpet operations to Shaw Industries Inc ."
    best_matches  = [w for (w, s) in process.extract(word, words, processor = semi_process, limit = limit) if (s > 70)]
    logging.debug("Best matches = {}".format(best_matches))
    return list(exact_matches.union([i for (i, w) in enumerate(words) if w in best_matches]))

# Flatten a list of lists 

示例7: find_index_tag

# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def find_index_tag(tag, tlist):
    Returns a list with first element as tag and rest indexes of todos with that tag
    index_list = []
    similar_tags = []
    for i, todo in enumerate(tlist):
        similar_tags = process.extract(tag, todo.tags)
        if len(similar_tags) > 0:
            if len(index_list) == 0:
            if similar_tags[0][1] > 70:
    return index_list 

示例8: extract

# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def extract(self, context):
        entities = process.extract(context["query"], self.keywords)
        entities = filter(lambda x: x[1] >= self.threshold, entities)
        entities = sorted(entities, key=lambda x: x[1] + len(x[0])/10, reverse=True)
        entities = list(map(lambda x: Tag(TAGMAP[x[0]]), entities))

        if len(entities) == 0:
            return None
        return entities[0] 

示例9: transform

# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def transform(self, context):
        return self.extract(context) 

示例10: search

# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def search(self, query):
        def processor(x):
            if isinstance(x, Issue):
                x = x.title
            return x.strip().lower()

        # We don't care about the score, so return first element
        # This must not happen while updating the self.issues dict so acquire the lock
        with self.issues_lock:
            return [result[0] for result in process.extract(query, self.issues, scorer=fuzz.partial_ratio,
                                                            processor=processor, limit=5)] 

示例11: search_station

# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def search_station(self, query):
        search station by name or code
        return [x[0] for x in process.extract(query, self.stations)] 

示例12: search_train

# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def search_train(self, query):
        search train by name or number
        return [x[0] for x in process.extract(query, self.trains.values())] 

示例13: search_term

# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def search_term(self, txt, pw_count):
        pws = pw_count.keys()
        if type(txt) == str:
            txt = [txt]
        total_matches = 0
        for t in txt:
            matches = [r[0] for r in process.extract(t, pws, limit=None) if r[1] > 75]

            total_matches += sum([pw_count[p]["count"] for p in matches])

        return total_matches 

示例14: search_passwords_fuzzy

# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def search_passwords_fuzzy(query):
    ''' Search passwords using the Fuzzy search method using fuzzywuzzy'''
    passwords = list_passwords()
    return [entry[0] for entry in process.extract(query, passwords)] 

示例15: fuzzy

# 需要导入模块: from fuzzywuzzy import process [as 别名]
# 或者: from fuzzywuzzy.process import extract [as 别名]
def fuzzy(tokens):
    averages = []
    for token in tokens:
        sameTokenRemoved = False
        result = process.extract(token, tokens, scorer=fuzz.partial_ratio)
        scores = []
        for each in result:
            score = each[1]
            if score == 100 and not sameTokenRemoved:
                sameTokenRemoved = True
        average = statistics.mean(scores)
    return statistics.mean(averages) 
