本文整理汇总了Python中spacy.matcher方法的典型用法代码示例。如果您正苦于以下问题:Python spacy.matcher方法的具体用法?Python spacy.matcher怎么用?Python spacy.matcher使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类spacy
的用法示例。
在下文中一共展示了spacy.matcher方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: extract_name
# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import matcher [as 别名]
def extract_name(nlp_text, matcher):
'''
Helper function to extract name from spacy nlp text
:param nlp_text: object of `spacy.tokens.doc.Doc`
:param matcher: object of `spacy.matcher.Matcher`
:return: string of full name
'''
pattern = [cs.NAME_PATTERN]
matcher.add('NAME', None, *pattern)
matches = matcher(nlp_text)
for match_id, start, end in matches:
span = nlp_text[start:end]
return span.text
示例2: __get_basic_details
# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import matcher [as 别名]
def __get_basic_details(self):
name = utils.extract_name(self.__nlp, matcher=self.__matcher)
email = utils.extract_email(self.__text)
mobile = utils.extract_mobile_number(self.__text)
skills = utils.extract_skills(self.__nlp, self.__noun_chunks)
edu = utils.extract_education([sent.string.strip() for sent in self.__nlp.sents])
experience = utils.extract_experience(self.__text)
entities = utils.extract_entity_sections(self.__text_raw)
self.__details['name'] = name
self.__details['email'] = email
self.__details['mobile_number'] = mobile
self.__details['skills'] = skills
# self.__details['education'] = entities['education']
self.__details['education'] = edu
self.__details['experience'] = experience
try:
self.__details['competencies'] = utils.extract_competencies(self.__text_raw, entities['experience'])
self.__details['measurable_results'] = utils.extract_measurable_results(self.__text_raw, entities['experience'])
except KeyError:
self.__details['competencies'] = []
self.__details['measurable_results'] = []
return
示例3: match_mentioned_concepts
# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import matcher [as 别名]
def match_mentioned_concepts(nlp, sents, answers, batch_id = -1):
matcher = load_matcher(nlp)
res = []
# print("Begin matching concepts.")
for sid, s in tqdm(enumerate(sents), total=len(sents), desc="grounding batch_id:%d"%batch_id):
a = answers[sid]
all_concepts = ground_mentioned_concepts(nlp, matcher, s, a)
answer_concepts = ground_mentioned_concepts(nlp, matcher, a)
question_concepts = all_concepts - answer_concepts
if len(question_concepts)==0:
# print(s)
question_concepts = hard_ground(nlp, s) # not very possible
if len(answer_concepts)==0:
print(a)
answer_concepts = hard_ground(nlp, a) # some case
print(answer_concepts)
res.append({"sent": s, "ans": a, "qc": list(question_concepts), "ac": list(answer_concepts)})
return res
示例4: __init__
# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import matcher [as 别名]
def __init__(self, spacy_model: str = 'en_core_web_sm', disable: Optional[Iterable[str]] = None, **kwargs):
if disable is None:
disable = ['parser', 'ner']
self.model = _try_load_spacy_model(spacy_model, disable=disable)
below = lambda text: bool(re.compile(r'below|cheap').match(text))
BELOW = self.model.vocab.add_flag(below)
above = lambda text: bool(re.compile(r'above|start').match(text))
ABOVE = self.model.vocab.add_flag(above)
self.matcher = Matcher(self.model.vocab)
self.matcher.add('below', None, [{BELOW: True}, {'LOWER': 'than', 'OP': '?'},
{'LOWER': 'from', 'OP': '?'}, {'ORTH': '$', 'OP': '?'},
{'ENT_TYPE': 'MONEY', 'LIKE_NUM': True}])
self.matcher.add('above', None, [{ABOVE: True}, {'LOWER': 'than', 'OP': '?'},
{'LOWER': 'from', 'OP': '?'}, {'ORTH': '$', 'OP': '?'},
{'ENT_TYPE': 'MONEY', 'LIKE_NUM': True}])
示例5: match
# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import matcher [as 别名]
def match(self, matcher):
"""
Run a SpaCy matcher over the cleaned content
>>> import spacy.matcher
>>> from textpipe.doc import Doc
>>> matcher = spacy.matcher.Matcher(spacy.lang.en.English().vocab)
>>> matcher.add('HASHTAG', None, [{'ORTH': '#'}, {'IS_ASCII': True}])
>>> Doc('Test with #hashtag').match(matcher)
[('#hashtag', 'HASHTAG')]
"""
return [(self._spacy_doc[start:end].text, matcher.vocab.strings[match_id])
for match_id, start, end in matcher(self._spacy_doc)]
示例6: judgemental
# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import matcher [as 别名]
def judgemental(question):
matcher = PhraseMatcher(nlp.vocab)
matcher.add('JUDGEMENT', None, nlp('your'), nlp('you'), nlp('Your'), nlp('You'))
return len(matcher(question)) > 0
示例7: load_matcher
# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import matcher [as 别名]
def load_matcher(nlp):
config = configparser.ConfigParser()
config.read("paths.cfg")
with open(config["paths"]["matcher_patterns"], "r", encoding="utf8") as f:
all_patterns = json.load(f)
matcher = Matcher(nlp.vocab)
for concept, pattern in tqdm(all_patterns.items(), desc="Adding patterns to Matcher."):
matcher.add(concept, None, pattern)
return matcher
示例8: extract_money
# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import matcher [as 别名]
def extract_money(self, doc: spacy.tokens.Doc) -> Tuple[List, Tuple[float, float]]:
"""Extract money entities and money related tokens from `doc`.
Parameters:
doc: a list of tokens with corresponding tags, lemmas, etc.
Returns:
doc_no_money: doc with no money related tokens.
money_range: money range from `money_range[0]` to `money_range[1]` extracted from the doc.
"""
matches = self.matcher(doc)
money_range: Tuple = ()
doc_no_money = list(doc)
negated = False
for match_id, start, end in matches:
string_id = self.model.vocab.strings[match_id]
span = doc[start:end]
for child in doc[start].children:
if child.dep_ == 'neg':
negated = True
num_token = [token for token in span if token.like_num == True]
if (string_id == 'below' and negated == False) or (string_id == 'above' and negated == True):
money_range = (0, float(num_token[0].text))
if (string_id == 'above' and negated == False) or (string_id == 'below' and negated == True):
money_range = (float(num_token[0].text), float(math.inf))
del doc_no_money[start:end + 1]
return doc_no_money, money_range
示例9: ground_mentioned_concepts
# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import matcher [as 别名]
def ground_mentioned_concepts(nlp, matcher, s, ans = ""):
s = s.lower()
doc = nlp(s)
matches = matcher(doc)
mentioned_concepts = set()
span_to_concepts = {}
for match_id, start, end in matches:
span = doc[start:end].text # the matched span
if len(set(span.split(" ")).intersection(set(ans.split(" ")))) > 0:
continue
original_concept = nlp.vocab.strings[match_id]
# print("Matched '" + span + "' to the rule '" + string_id)
if len(original_concept.split("_")) == 1:
original_concept = list(lemmatize(nlp, original_concept))[0]
if span not in span_to_concepts:
span_to_concepts[span] = set()
span_to_concepts[span].add(original_concept)
for span, concepts in span_to_concepts.items():
concepts_sorted = list(concepts)
concepts_sorted.sort(key=len)
# mentioned_concepts.update(concepts_sorted[0:2])
shortest = concepts_sorted[0:3] #
for c in shortest:
if c in blacklist:
continue
lcs = lemmatize(nlp, c)
intersect = lcs.intersection(shortest)
if len(intersect)>0:
mentioned_concepts.add(list(intersect)[0])
else:
mentioned_concepts.add(c)
# stop = timeit.default_timer()
# print('\t Done! Time: ', "{0:.2f} sec".format(float(stop - start_time)))
# if __name__ == "__main__":
# print("Sentence: " + s)
# print(mentioned_concepts)
# print()
return mentioned_concepts