Python spacy.matcher方法代码示例

本文整理汇总了Python中spacy.matcher方法的典型用法代码示例。如果您正苦于以下问题：Python spacy.matcher方法的具体用法？Python spacy.matcher怎么用？Python spacy.matcher使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类spacy的用法示例。

在下文中一共展示了spacy.matcher方法的9个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: extract_name

# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import matcher [as 别名]
def extract_name(nlp_text, matcher):
    '''
    Helper function to extract name from spacy nlp text

    :param nlp_text: object of `spacy.tokens.doc.Doc`
    :param matcher: object of `spacy.matcher.Matcher`
    :return: string of full name
    '''
    pattern = [cs.NAME_PATTERN]
    
    matcher.add('NAME', None, *pattern)
    
    matches = matcher(nlp_text)
    
    for match_id, start, end in matches:
        span = nlp_text[start:end]
        return span.text

开发者ID:OmkarPathak，项目名称:ResumeParser，代码行数:19，代码来源:utils.py

示例2: __get_basic_details

# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import matcher [as 别名]
def __get_basic_details(self):
        name       = utils.extract_name(self.__nlp, matcher=self.__matcher)
        email      = utils.extract_email(self.__text)
        mobile     = utils.extract_mobile_number(self.__text)
        skills     = utils.extract_skills(self.__nlp, self.__noun_chunks)
        edu        = utils.extract_education([sent.string.strip() for sent in self.__nlp.sents])
        experience = utils.extract_experience(self.__text)
        entities   = utils.extract_entity_sections(self.__text_raw)
        self.__details['name'] = name
        self.__details['email'] = email
        self.__details['mobile_number'] = mobile
        self.__details['skills'] = skills
        # self.__details['education'] = entities['education']
        self.__details['education'] = edu
        self.__details['experience'] = experience
        try:
            self.__details['competencies'] = utils.extract_competencies(self.__text_raw, entities['experience'])
            self.__details['measurable_results'] = utils.extract_measurable_results(self.__text_raw, entities['experience'])
        except KeyError:
            self.__details['competencies'] = []
            self.__details['measurable_results'] = []
        return

开发者ID:OmkarPathak，项目名称:ResumeParser，代码行数:24，代码来源:resume_parser.py

示例3: match_mentioned_concepts

# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import matcher [as 别名]
def match_mentioned_concepts(nlp, sents, answers, batch_id = -1):
    matcher = load_matcher(nlp)

    res = []
    # print("Begin matching concepts.")
    for sid, s in tqdm(enumerate(sents), total=len(sents), desc="grounding batch_id:%d"%batch_id):
        a = answers[sid]
        all_concepts = ground_mentioned_concepts(nlp, matcher, s, a)
        answer_concepts = ground_mentioned_concepts(nlp, matcher, a)
        question_concepts = all_concepts - answer_concepts
        if len(question_concepts)==0:
            # print(s)
            question_concepts = hard_ground(nlp, s) # not very possible
        if len(answer_concepts)==0:
            print(a)
            answer_concepts = hard_ground(nlp, a) # some case
            print(answer_concepts)

        res.append({"sent": s, "ans": a, "qc": list(question_concepts), "ac": list(answer_concepts)})
    return res

开发者ID:INK-USC，项目名称:KagNet，代码行数:22，代码来源:grounding_concepts.py

示例4: init

# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import matcher [as 别名]
def __init__(self, spacy_model: str = 'en_core_web_sm', disable: Optional[Iterable[str]] = None, **kwargs):
        if disable is None:
            disable = ['parser', 'ner']

        self.model = _try_load_spacy_model(spacy_model, disable=disable)

        below = lambda text: bool(re.compile(r'below|cheap').match(text))
        BELOW = self.model.vocab.add_flag(below)

        above = lambda text: bool(re.compile(r'above|start').match(text))
        ABOVE = self.model.vocab.add_flag(above)

        self.matcher = Matcher(self.model.vocab)

        self.matcher.add('below', None, [{BELOW: True}, {'LOWER': 'than', 'OP': '?'},
                                         {'LOWER': 'from', 'OP': '?'}, {'ORTH': '$', 'OP': '?'},
                                         {'ENT_TYPE': 'MONEY', 'LIKE_NUM': True}])

        self.matcher.add('above', None, [{ABOVE: True}, {'LOWER': 'than', 'OP': '?'},
                                         {'LOWER': 'from', 'OP': '?'}, {'ORTH': '$', 'OP': '?'},
                                         {'ENT_TYPE': 'MONEY', 'LIKE_NUM': True}])

开发者ID:deepmipt，项目名称:DeepPavlov，代码行数:23，代码来源:ecommerce_preprocess.py

示例5: match

# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import matcher [as 别名]
def match(self, matcher):
        """
        Run a SpaCy matcher over the cleaned content

        >>> import spacy.matcher
        >>> from textpipe.doc import Doc
        >>> matcher = spacy.matcher.Matcher(spacy.lang.en.English().vocab)
        >>> matcher.add('HASHTAG', None, [{'ORTH': '#'}, {'IS_ASCII': True}])
        >>> Doc('Test with #hashtag').match(matcher)
        [('#hashtag', 'HASHTAG')]
        """
        return [(self._spacy_doc[start:end].text, matcher.vocab.strings[match_id])
                for match_id, start, end in matcher(self._spacy_doc)]

开发者ID:textpipe，项目名称:textpipe，代码行数:15，代码来源:doc.py

示例6: judgemental

# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import matcher [as 别名]
def judgemental(question):
    matcher = PhraseMatcher(nlp.vocab)
    matcher.add('JUDGEMENT', None, nlp('your'), nlp('you'), nlp('Your'), nlp('You'))
    return len(matcher(question)) > 0

开发者ID:martiansideofthemoon，项目名称:squash-generation，代码行数:6，代码来源:question_rules.py

示例7: load_matcher

# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import matcher [as 别名]
def load_matcher(nlp):
    config = configparser.ConfigParser()
    config.read("paths.cfg")
    with open(config["paths"]["matcher_patterns"], "r", encoding="utf8") as f:
        all_patterns = json.load(f)

    matcher = Matcher(nlp.vocab)
    for concept, pattern in tqdm(all_patterns.items(), desc="Adding patterns to Matcher."):
        matcher.add(concept, None, pattern)
    return matcher

开发者ID:INK-USC，项目名称:KagNet，代码行数:12，代码来源:grounding_concepts.py

示例8: extract_money

# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import matcher [as 别名]
def extract_money(self, doc: spacy.tokens.Doc) -> Tuple[List, Tuple[float, float]]:
        """Extract money entities and money related tokens from `doc`.

        Parameters:
            doc: a list of tokens with corresponding tags, lemmas, etc.

        Returns:
            doc_no_money: doc with no money related tokens.
            money_range: money range from `money_range[0]` to `money_range[1]` extracted from the doc.
        """

        matches = self.matcher(doc)
        money_range: Tuple = ()
        doc_no_money = list(doc)
        negated = False

        for match_id, start, end in matches:
            string_id = self.model.vocab.strings[match_id]
            span = doc[start:end]
            for child in doc[start].children:
                if child.dep_ == 'neg':
                    negated = True

            num_token = [token for token in span if token.like_num == True]
            if (string_id == 'below' and negated == False) or (string_id == 'above' and negated == True):
                money_range = (0, float(num_token[0].text))

            if (string_id == 'above' and negated == False) or (string_id == 'below' and negated == True):
                money_range = (float(num_token[0].text), float(math.inf))

            del doc_no_money[start:end + 1]
        return doc_no_money, money_range

开发者ID:deepmipt，项目名称:DeepPavlov，代码行数:34，代码来源:ecommerce_preprocess.py

示例9: ground_mentioned_concepts

# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import matcher [as 别名]
def ground_mentioned_concepts(nlp, matcher, s, ans = ""):
    s = s.lower()
    doc = nlp(s)
    matches = matcher(doc)

    mentioned_concepts = set()
    span_to_concepts = {}

    for match_id, start, end in matches:

        span = doc[start:end].text  # the matched span
        if len(set(span.split(" ")).intersection(set(ans.split(" ")))) > 0:
            continue
        original_concept = nlp.vocab.strings[match_id]
        # print("Matched '" + span + "' to the rule '" + string_id)

        if len(original_concept.split("_")) == 1:
            original_concept = list(lemmatize(nlp, original_concept))[0]

        if span not in span_to_concepts:
            span_to_concepts[span] = set()

        span_to_concepts[span].add(original_concept)

    for span, concepts in span_to_concepts.items():
        concepts_sorted = list(concepts)
        concepts_sorted.sort(key=len)

        # mentioned_concepts.update(concepts_sorted[0:2])

        shortest = concepts_sorted[0:3] #
        for c in shortest:
            if c in blacklist:
                continue
            lcs = lemmatize(nlp, c)
            intersect = lcs.intersection(shortest)
            if len(intersect)>0:
                mentioned_concepts.add(list(intersect)[0])
            else:
                mentioned_concepts.add(c)


    # stop = timeit.default_timer()
    # print('\t Done! Time: ', "{0:.2f} sec".format(float(stop - start_time)))

    # if __name__ == "__main__":
    #     print("Sentence: " + s)
    #     print(mentioned_concepts)
    #     print()
    return mentioned_concepts

开发者ID:INK-USC，项目名称:KagNet，代码行数:52，代码来源:grounding_concepts.py

注：本文中的spacy.matcher方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。