当前位置: 首页>>代码示例>>Python>>正文


Python matcher.Matcher方法代码示例

本文整理汇总了Python中spacy.matcher.Matcher方法的典型用法代码示例。如果您正苦于以下问题:Python matcher.Matcher方法的具体用法?Python matcher.Matcher怎么用?Python matcher.Matcher使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在spacy.matcher的用法示例。


在下文中一共展示了matcher.Matcher方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from spacy import matcher [as 别名]
# 或者: from spacy.matcher import Matcher [as 别名]
def __init__(self, resume):
        nlp = spacy.load('en_core_web_sm')
        self.__matcher = Matcher(nlp.vocab)
        self.__details = {
            'name'              : None,
            'email'             : None,
            'mobile_number'     : None,
            'skills'            : None,
            'education'         : None,
            'experience'        : None,
            'competencies'      : None,
            'measurable_results': None
        }
        self.__resume      = resume
        self.__text_raw    = utils.extract_text(self.__resume, os.path.splitext(self.__resume)[1])
        self.__text        = ' '.join(self.__text_raw.split())
        self.__nlp         = nlp(self.__text)
        self.__noun_chunks = list(self.__nlp.noun_chunks)
        self.__get_basic_details() 
开发者ID:OmkarPathak,项目名称:ResumeParser,代码行数:21,代码来源:resume_parser.py

示例2: __init__

# 需要导入模块: from spacy import matcher [as 别名]
# 或者: from spacy.matcher import Matcher [as 别名]
def __init__(self, spacy_pipeline):
        self.nlp = spacy_pipeline
        Token.set_extension('feature_is_time_unit', default=False)
        self.nlp.entity.add_label('time_unit')
        self.time_matcher = Matcher(self.nlp.vocab)

        self.time_matcher.add('UNIT_OF_TIME', None,
                              [{'LOWER': 'sec'}],
                              [{'LOWER': 'second'}],
                              [{'LOWER': 'seconds'}],
                              [{'LOWER': 'min'}],
                              [{'LOWER': 'minute'}],
                              [{'LOWER': 'minutes'}],
                              [{'LOWER': 'hr'}],
                              [{'LOWER': 'hour'}],
                              [{'LOWER': 'day'}],
                              [{'LOWER': 'days'}],
                              [{'LOWER': 'week'}],
                              [{'LOWER': 'weeks'}],
                              [{'LOWER': 'month'}],
                              [{'LOWER': 'months'}],
                              [{'LOWER': 'year'}],
                              [{'LOWER': 'years'}],
                              [{'LOWER': 'yrs'}]
                              ) 
开发者ID:NLPatVCU,项目名称:medaCy,代码行数:27,代码来源:time_unit_component.py

示例3: __init__

# 需要导入模块: from spacy import matcher [as 别名]
# 或者: from spacy.matcher import Matcher [as 别名]
def __init__(self, spacy_pipeline):
        self.nlp = spacy_pipeline
        Token.set_extension('feature_is_frequency_indicator', default=False)
        self.nlp.entity.add_label('frequency_indicator')
        self.frequency_matcher = Matcher(self.nlp.vocab)

        self.frequency_matcher.add('FREQUENCY_MATCHER', None,
                               [{'LOWER': 'bid'}],
                               [{'LOWER': 'prn'}],
                               [{'LOWER': 'qid'}],
                               [{'LOWER': 'tid'}],
                               [{'LOWER': 'qd'}],
                               [{'LOWER': 'daily'}],
                               [{'LOWER': 'hs'}],
                               [{'LOWER': 'as'}, {'LOWER': 'needed'}],
                               [{'LOWER': 'once'}, {'LOWER': 'a'}, {'LOWER': 'day'}],
                               [{'LOWER': 'twice'}, {'LOWER': 'a'}, {'LOWER': 'day'}]
                               ) 
开发者ID:NLPatVCU,项目名称:medaCy,代码行数:20,代码来源:frequency_unit_component.py

示例4: __init__

# 需要导入模块: from spacy import matcher [as 别名]
# 或者: from spacy.matcher import Matcher [as 别名]
def __init__(self, spacy_model: str = 'en_core_web_sm', disable: Optional[Iterable[str]] = None, **kwargs):
        if disable is None:
            disable = ['parser', 'ner']

        self.model = _try_load_spacy_model(spacy_model, disable=disable)

        below = lambda text: bool(re.compile(r'below|cheap').match(text))
        BELOW = self.model.vocab.add_flag(below)

        above = lambda text: bool(re.compile(r'above|start').match(text))
        ABOVE = self.model.vocab.add_flag(above)

        self.matcher = Matcher(self.model.vocab)

        self.matcher.add('below', None, [{BELOW: True}, {'LOWER': 'than', 'OP': '?'},
                                         {'LOWER': 'from', 'OP': '?'}, {'ORTH': '$', 'OP': '?'},
                                         {'ENT_TYPE': 'MONEY', 'LIKE_NUM': True}])

        self.matcher.add('above', None, [{ABOVE: True}, {'LOWER': 'than', 'OP': '?'},
                                         {'LOWER': 'from', 'OP': '?'}, {'ORTH': '$', 'OP': '?'},
                                         {'ENT_TYPE': 'MONEY', 'LIKE_NUM': True}]) 
开发者ID:deepmipt,项目名称:DeepPavlov,代码行数:23,代码来源:ecommerce_preprocess.py

示例5: __init__

# 需要导入模块: from spacy import matcher [as 别名]
# 或者: from spacy.matcher import Matcher [as 别名]
def __init__(self, nlp, patterns: list = None):
        """
        SpaCy pipe to match Entity based on multiple patterns.

        Pattern examples:
        patterns = [
            {'kind': 'phrase', 'value': 'amazon', 'entity': 'PRODUCT'},
            {'kind': 'regex', 'value': 'ama(.+)', 'entity': 'PRODUCT'}
        ]

        :param nlp: The NLP object
        :param patterns: The matcher patterns
        """
        self.nlp = nlp
        self.phrase_matcher = PhraseMatcher(nlp.vocab)
        self.matcher = Matcher(nlp.vocab)

        self.extra_patterns = []
        # start add pattern
        self.add_patterns(patterns=patterns or []) 
开发者ID:kororo,项目名称:excelcy,代码行数:22,代码来源:pipe.py

示例6: __init__

# 需要导入模块: from spacy import matcher [as 别名]
# 或者: from spacy.matcher import Matcher [as 别名]
def __init__(
        self,
        resume,
        skills_file=None,
        custom_regex=None
    ):
        nlp = spacy.load('en_core_web_sm')
        custom_nlp = spacy.load(os.path.dirname(os.path.abspath(__file__)))
        self.__skills_file = skills_file
        self.__custom_regex = custom_regex
        self.__matcher = Matcher(nlp.vocab)
        self.__details = {
            'name': None,
            'email': None,
            'mobile_number': None,
            'skills': None,
            'college_name': None,
            'degree': None,
            'designation': None,
            'experience': None,
            'company_names': None,
            'no_of_pages': None,
            'total_experience': None,
        }
        self.__resume = resume
        if not isinstance(self.__resume, io.BytesIO):
            ext = os.path.splitext(self.__resume)[1].split('.')[1]
        else:
            ext = self.__resume.name.split('.')[1]
        self.__text_raw = utils.extract_text(self.__resume, '.' + ext)
        self.__text = ' '.join(self.__text_raw.split())
        self.__nlp = nlp(self.__text)
        self.__custom_nlp = custom_nlp(self.__text_raw)
        self.__noun_chunks = list(self.__nlp.noun_chunks)
        self.__get_basic_details() 
开发者ID:OmkarPathak,项目名称:pyresparser,代码行数:37,代码来源:resume_parser.py

示例7: load_matcher

# 需要导入模块: from spacy import matcher [as 别名]
# 或者: from spacy.matcher import Matcher [as 别名]
def load_matcher(nlp):
    config = configparser.ConfigParser()
    config.read("paths.cfg")
    with open(config["paths"]["matcher_patterns"], "r", encoding="utf8") as f:
        all_patterns = json.load(f)

    matcher = Matcher(nlp.vocab)
    for concept, pattern in tqdm(all_patterns.items(), desc="Adding patterns to Matcher."):
        matcher.add(concept, None, pattern)
    return matcher 
开发者ID:INK-USC,项目名称:KagNet,代码行数:12,代码来源:grounding_concepts.py

示例8: __init__

# 需要导入模块: from spacy import matcher [as 别名]
# 或者: from spacy.matcher import Matcher [as 别名]
def __init__(self, spacy_pipeline):
        self.nlp = spacy_pipeline
        Token.set_extension('feature_is_measurement_unit', default=False)
        self.nlp.entity.add_label('measurement_unit')
        self.unit_of_measurement_matcher = Matcher(self.nlp.vocab)

        self.unit_of_measurement_matcher.add('UNIT_OF_MEASUREMENT', None,
                         [{'ENT_TYPE': 'mass_unit'}, {'ORTH': '/'}, {'ENT_TYPE': 'volume_unit'}],
                         [{'ENT_TYPE': 'volume_unit'}, {'ORTH': '/'}, {'ENT_TYPE': 'time_unit'}],
                         [{'ENT_TYPE': 'form_unit'}, {'ORTH': '/'}, {'ENT_TYPE': 'volume_unit'}]
                         ) 
开发者ID:NLPatVCU,项目名称:medaCy,代码行数:13,代码来源:measurement_unit_component.py

示例9: __init__

# 需要导入模块: from spacy import matcher [as 别名]
# 或者: from spacy.matcher import Matcher [as 别名]
def __init__(self, spacy_pipeline):
        self.nlp = spacy_pipeline
        Token.set_extension('feature_is_volume_unit', default=False)
        self.nlp.entity.add_label('volume_unit')
        self.volume_matcher = Matcher(self.nlp.vocab)

        self.volume_matcher.add('UNIT_OF_VOLUME', None,
                                [{'LOWER': 'ml'}],
                                 [{'ORTH': 'dL'}],
                                [{'LOWER': 'cc'}],
                                [{'ORTH': 'L'}]) 
开发者ID:NLPatVCU,项目名称:medaCy,代码行数:13,代码来源:volume_unit_component.py

示例10: __init__

# 需要导入模块: from spacy import matcher [as 别名]
# 或者: from spacy.matcher import Matcher [as 别名]
def __init__(self, spacy_pipeline):
        self.nlp = spacy_pipeline
        Token.set_extension('feature_is_mass_unit', default=False)
        self.nlp.entity.add_label('mass_unit')
        self.mass_matcher = Matcher(self.nlp.vocab)

        self.mass_matcher.add('UNIT_OF_MASS', None,
                              [{'LOWER': 'mcg'}],
                              [{'LOWER': 'microgram'}],
                              [{'LOWER': 'micrograms'}],
                              [{'ORTH': 'mg'}],
                              [{'LOWER': 'milligram'}],
                              [{'LOWER': 'g'}],
                              [{'LOWER': 'kg'}],
                              [{'ORTH': 'mEq'}]) 
开发者ID:NLPatVCU,项目名称:medaCy,代码行数:17,代码来源:mass_unit_component.py

示例11: __init__

# 需要导入模块: from spacy import matcher [as 别名]
# 或者: from spacy.matcher import Matcher [as 别名]
def __init__(self, vocab, boundary_protection_rules=[]):
        self.matcher = Matcher(vocab)
        for rule in boundary_protection_rules:
            self.matcher.add(rule["label"], None, rule["pattern"]) 
开发者ID:ICLRandD,项目名称:Blackstone,代码行数:6,代码来源:sentence_segmenter.py

示例12: __init__

# 需要导入模块: from spacy import matcher [as 别名]
# 或者: from spacy.matcher import Matcher [as 别名]
def __init__(self, nlp) -> None:
        Doc.set_extension("compound_cases", default=[], force=True)
        self.matcher = Matcher(nlp.vocab)
        common_pattern = [{"ent_type": "CASENAME"}, {"ent_type": "CITATION", "OP": "+"}]
        possessive_pattern = [
            {"ent_type": "CASENAME"},
            {"lower": "case"},
            {"ent_type": "CITATION"},
        ]
        self.matcher.add("compound_case", None, common_pattern, possessive_pattern)
        self.global_matcher = Matcher(nlp.vocab)
        merge_ents = nlp.create_pipe("merge_entities")
        nlp.add_pipe(merge_ents) 
开发者ID:ICLRandD,项目名称:Blackstone,代码行数:15,代码来源:compound_cases.py

示例13: __init__

# 需要导入模块: from spacy import matcher [as 别名]
# 或者: from spacy.matcher import Matcher [as 别名]
def __init__(self, nlp) -> None:
        Doc.set_extension("abbreviations", default=[], force=True)
        Span.set_extension("long_form", default=None, force=True)

        self.matcher = Matcher(nlp.vocab)
        self.matcher.add(
            "parenthesis", None, [{"ORTH": "("}, {"OP": "+"}, {"ORTH": ")"}]
        )
        self.global_matcher = Matcher(nlp.vocab) 
开发者ID:ICLRandD,项目名称:Blackstone,代码行数:11,代码来源:abbreviations.py

示例14: add_pattern

# 需要导入模块: from spacy import matcher [as 别名]
# 或者: from spacy.matcher import Matcher [as 别名]
def add_pattern(self, kind: str, value, entity: str):
        """
        Add pattern into matcher algorithm. There are two different types:
        - phrase: This uses PhraseMatcher which described in https://spacy.io/usage/linguistic-features#adding-phrase-patterns
        - regex: This uses Matcher which described in https://spacy.io/usage/linguistic-features#regex

        :param kind: Pattern matcher type, either 'phrase', 'regex'
        :param value: Entity pattern matcher
        :param entity: Entity to be matched
        """
        if kind == 'phrase':
            self.phrase_matcher.add(entity, None, *[self.nlp(value)])
        elif kind == 'regex':
            regex_flag = self.nlp.vocab.add_flag(lambda text: self.eval_regex(pattern=value, text=text))
            self.matcher.add(entity, None, [{regex_flag: True}]) 
开发者ID:kororo,项目名称:excelcy,代码行数:17,代码来源:pipe.py


注:本文中的spacy.matcher.Matcher方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。