本文整理匯總了Python中spacy.matcher.Matcher方法的典型用法代碼示例。如果您正苦於以下問題:Python matcher.Matcher方法的具體用法?Python matcher.Matcher怎麽用?Python matcher.Matcher使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類spacy.matcher
的用法示例。
在下文中一共展示了matcher.Matcher方法的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: __init__
# 需要導入模塊: from spacy import matcher [as 別名]
# 或者: from spacy.matcher import Matcher [as 別名]
def __init__(self, resume):
nlp = spacy.load('en_core_web_sm')
self.__matcher = Matcher(nlp.vocab)
self.__details = {
'name' : None,
'email' : None,
'mobile_number' : None,
'skills' : None,
'education' : None,
'experience' : None,
'competencies' : None,
'measurable_results': None
}
self.__resume = resume
self.__text_raw = utils.extract_text(self.__resume, os.path.splitext(self.__resume)[1])
self.__text = ' '.join(self.__text_raw.split())
self.__nlp = nlp(self.__text)
self.__noun_chunks = list(self.__nlp.noun_chunks)
self.__get_basic_details()
示例2: __init__
# 需要導入模塊: from spacy import matcher [as 別名]
# 或者: from spacy.matcher import Matcher [as 別名]
def __init__(self, spacy_pipeline):
self.nlp = spacy_pipeline
Token.set_extension('feature_is_time_unit', default=False)
self.nlp.entity.add_label('time_unit')
self.time_matcher = Matcher(self.nlp.vocab)
self.time_matcher.add('UNIT_OF_TIME', None,
[{'LOWER': 'sec'}],
[{'LOWER': 'second'}],
[{'LOWER': 'seconds'}],
[{'LOWER': 'min'}],
[{'LOWER': 'minute'}],
[{'LOWER': 'minutes'}],
[{'LOWER': 'hr'}],
[{'LOWER': 'hour'}],
[{'LOWER': 'day'}],
[{'LOWER': 'days'}],
[{'LOWER': 'week'}],
[{'LOWER': 'weeks'}],
[{'LOWER': 'month'}],
[{'LOWER': 'months'}],
[{'LOWER': 'year'}],
[{'LOWER': 'years'}],
[{'LOWER': 'yrs'}]
)
示例3: __init__
# 需要導入模塊: from spacy import matcher [as 別名]
# 或者: from spacy.matcher import Matcher [as 別名]
def __init__(self, spacy_pipeline):
self.nlp = spacy_pipeline
Token.set_extension('feature_is_frequency_indicator', default=False)
self.nlp.entity.add_label('frequency_indicator')
self.frequency_matcher = Matcher(self.nlp.vocab)
self.frequency_matcher.add('FREQUENCY_MATCHER', None,
[{'LOWER': 'bid'}],
[{'LOWER': 'prn'}],
[{'LOWER': 'qid'}],
[{'LOWER': 'tid'}],
[{'LOWER': 'qd'}],
[{'LOWER': 'daily'}],
[{'LOWER': 'hs'}],
[{'LOWER': 'as'}, {'LOWER': 'needed'}],
[{'LOWER': 'once'}, {'LOWER': 'a'}, {'LOWER': 'day'}],
[{'LOWER': 'twice'}, {'LOWER': 'a'}, {'LOWER': 'day'}]
)
示例4: __init__
# 需要導入模塊: from spacy import matcher [as 別名]
# 或者: from spacy.matcher import Matcher [as 別名]
def __init__(self, spacy_model: str = 'en_core_web_sm', disable: Optional[Iterable[str]] = None, **kwargs):
if disable is None:
disable = ['parser', 'ner']
self.model = _try_load_spacy_model(spacy_model, disable=disable)
below = lambda text: bool(re.compile(r'below|cheap').match(text))
BELOW = self.model.vocab.add_flag(below)
above = lambda text: bool(re.compile(r'above|start').match(text))
ABOVE = self.model.vocab.add_flag(above)
self.matcher = Matcher(self.model.vocab)
self.matcher.add('below', None, [{BELOW: True}, {'LOWER': 'than', 'OP': '?'},
{'LOWER': 'from', 'OP': '?'}, {'ORTH': '$', 'OP': '?'},
{'ENT_TYPE': 'MONEY', 'LIKE_NUM': True}])
self.matcher.add('above', None, [{ABOVE: True}, {'LOWER': 'than', 'OP': '?'},
{'LOWER': 'from', 'OP': '?'}, {'ORTH': '$', 'OP': '?'},
{'ENT_TYPE': 'MONEY', 'LIKE_NUM': True}])
示例5: __init__
# 需要導入模塊: from spacy import matcher [as 別名]
# 或者: from spacy.matcher import Matcher [as 別名]
def __init__(self, nlp, patterns: list = None):
"""
SpaCy pipe to match Entity based on multiple patterns.
Pattern examples:
patterns = [
{'kind': 'phrase', 'value': 'amazon', 'entity': 'PRODUCT'},
{'kind': 'regex', 'value': 'ama(.+)', 'entity': 'PRODUCT'}
]
:param nlp: The NLP object
:param patterns: The matcher patterns
"""
self.nlp = nlp
self.phrase_matcher = PhraseMatcher(nlp.vocab)
self.matcher = Matcher(nlp.vocab)
self.extra_patterns = []
# start add pattern
self.add_patterns(patterns=patterns or [])
示例6: __init__
# 需要導入模塊: from spacy import matcher [as 別名]
# 或者: from spacy.matcher import Matcher [as 別名]
def __init__(
self,
resume,
skills_file=None,
custom_regex=None
):
nlp = spacy.load('en_core_web_sm')
custom_nlp = spacy.load(os.path.dirname(os.path.abspath(__file__)))
self.__skills_file = skills_file
self.__custom_regex = custom_regex
self.__matcher = Matcher(nlp.vocab)
self.__details = {
'name': None,
'email': None,
'mobile_number': None,
'skills': None,
'college_name': None,
'degree': None,
'designation': None,
'experience': None,
'company_names': None,
'no_of_pages': None,
'total_experience': None,
}
self.__resume = resume
if not isinstance(self.__resume, io.BytesIO):
ext = os.path.splitext(self.__resume)[1].split('.')[1]
else:
ext = self.__resume.name.split('.')[1]
self.__text_raw = utils.extract_text(self.__resume, '.' + ext)
self.__text = ' '.join(self.__text_raw.split())
self.__nlp = nlp(self.__text)
self.__custom_nlp = custom_nlp(self.__text_raw)
self.__noun_chunks = list(self.__nlp.noun_chunks)
self.__get_basic_details()
示例7: load_matcher
# 需要導入模塊: from spacy import matcher [as 別名]
# 或者: from spacy.matcher import Matcher [as 別名]
def load_matcher(nlp):
config = configparser.ConfigParser()
config.read("paths.cfg")
with open(config["paths"]["matcher_patterns"], "r", encoding="utf8") as f:
all_patterns = json.load(f)
matcher = Matcher(nlp.vocab)
for concept, pattern in tqdm(all_patterns.items(), desc="Adding patterns to Matcher."):
matcher.add(concept, None, pattern)
return matcher
示例8: __init__
# 需要導入模塊: from spacy import matcher [as 別名]
# 或者: from spacy.matcher import Matcher [as 別名]
def __init__(self, spacy_pipeline):
self.nlp = spacy_pipeline
Token.set_extension('feature_is_measurement_unit', default=False)
self.nlp.entity.add_label('measurement_unit')
self.unit_of_measurement_matcher = Matcher(self.nlp.vocab)
self.unit_of_measurement_matcher.add('UNIT_OF_MEASUREMENT', None,
[{'ENT_TYPE': 'mass_unit'}, {'ORTH': '/'}, {'ENT_TYPE': 'volume_unit'}],
[{'ENT_TYPE': 'volume_unit'}, {'ORTH': '/'}, {'ENT_TYPE': 'time_unit'}],
[{'ENT_TYPE': 'form_unit'}, {'ORTH': '/'}, {'ENT_TYPE': 'volume_unit'}]
)
示例9: __init__
# 需要導入模塊: from spacy import matcher [as 別名]
# 或者: from spacy.matcher import Matcher [as 別名]
def __init__(self, spacy_pipeline):
self.nlp = spacy_pipeline
Token.set_extension('feature_is_volume_unit', default=False)
self.nlp.entity.add_label('volume_unit')
self.volume_matcher = Matcher(self.nlp.vocab)
self.volume_matcher.add('UNIT_OF_VOLUME', None,
[{'LOWER': 'ml'}],
[{'ORTH': 'dL'}],
[{'LOWER': 'cc'}],
[{'ORTH': 'L'}])
示例10: __init__
# 需要導入模塊: from spacy import matcher [as 別名]
# 或者: from spacy.matcher import Matcher [as 別名]
def __init__(self, spacy_pipeline):
self.nlp = spacy_pipeline
Token.set_extension('feature_is_mass_unit', default=False)
self.nlp.entity.add_label('mass_unit')
self.mass_matcher = Matcher(self.nlp.vocab)
self.mass_matcher.add('UNIT_OF_MASS', None,
[{'LOWER': 'mcg'}],
[{'LOWER': 'microgram'}],
[{'LOWER': 'micrograms'}],
[{'ORTH': 'mg'}],
[{'LOWER': 'milligram'}],
[{'LOWER': 'g'}],
[{'LOWER': 'kg'}],
[{'ORTH': 'mEq'}])
示例11: __init__
# 需要導入模塊: from spacy import matcher [as 別名]
# 或者: from spacy.matcher import Matcher [as 別名]
def __init__(self, vocab, boundary_protection_rules=[]):
self.matcher = Matcher(vocab)
for rule in boundary_protection_rules:
self.matcher.add(rule["label"], None, rule["pattern"])
示例12: __init__
# 需要導入模塊: from spacy import matcher [as 別名]
# 或者: from spacy.matcher import Matcher [as 別名]
def __init__(self, nlp) -> None:
Doc.set_extension("compound_cases", default=[], force=True)
self.matcher = Matcher(nlp.vocab)
common_pattern = [{"ent_type": "CASENAME"}, {"ent_type": "CITATION", "OP": "+"}]
possessive_pattern = [
{"ent_type": "CASENAME"},
{"lower": "case"},
{"ent_type": "CITATION"},
]
self.matcher.add("compound_case", None, common_pattern, possessive_pattern)
self.global_matcher = Matcher(nlp.vocab)
merge_ents = nlp.create_pipe("merge_entities")
nlp.add_pipe(merge_ents)
示例13: __init__
# 需要導入模塊: from spacy import matcher [as 別名]
# 或者: from spacy.matcher import Matcher [as 別名]
def __init__(self, nlp) -> None:
Doc.set_extension("abbreviations", default=[], force=True)
Span.set_extension("long_form", default=None, force=True)
self.matcher = Matcher(nlp.vocab)
self.matcher.add(
"parenthesis", None, [{"ORTH": "("}, {"OP": "+"}, {"ORTH": ")"}]
)
self.global_matcher = Matcher(nlp.vocab)
示例14: add_pattern
# 需要導入模塊: from spacy import matcher [as 別名]
# 或者: from spacy.matcher import Matcher [as 別名]
def add_pattern(self, kind: str, value, entity: str):
"""
Add pattern into matcher algorithm. There are two different types:
- phrase: This uses PhraseMatcher which described in https://spacy.io/usage/linguistic-features#adding-phrase-patterns
- regex: This uses Matcher which described in https://spacy.io/usage/linguistic-features#regex
:param kind: Pattern matcher type, either 'phrase', 'regex'
:param value: Entity pattern matcher
:param entity: Entity to be matched
"""
if kind == 'phrase':
self.phrase_matcher.add(entity, None, *[self.nlp(value)])
elif kind == 'regex':
regex_flag = self.nlp.vocab.add_flag(lambda text: self.eval_regex(pattern=value, text=text))
self.matcher.add(entity, None, [{regex_flag: True}])