当前位置: 首页>>代码示例>>Python>>正文


Python pymorphy2.MorphAnalyzer方法代码示例

本文整理汇总了Python中pymorphy2.MorphAnalyzer方法的典型用法代码示例。如果您正苦于以下问题:Python pymorphy2.MorphAnalyzer方法的具体用法?Python pymorphy2.MorphAnalyzer怎么用?Python pymorphy2.MorphAnalyzer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pymorphy2的用法示例。


在下文中一共展示了pymorphy2.MorphAnalyzer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self, language: str,
                 file_names: List[str],
                 config: TrainConfig,
                 grammeme_vectorizer_input: GrammemeVectorizer,
                 grammeme_vectorizer_output: GrammemeVectorizer,
                 indices: np.array,
                 word_vocabulary: WordVocabulary,
                 char_set: str,
                 build_config: BuildModelConfig):
        self.language = language
        self.file_names = file_names  # type: List[str]
        # Параметры батчей.
        self.batch_size = config.external_batch_size  # type: int
        self.bucket_borders = config.sentence_len_groups  # type: List[Tuple[int]]
        self.buckets = [list() for _ in range(len(self.bucket_borders))]
        self.build_config = build_config
        self.word_vocabulary = word_vocabulary
        self.char_set = char_set
        # Разбиение на выборки.
        self.indices = indices  # type: np.array
        # Подготовленные словари.
        self.grammeme_vectorizer_input = grammeme_vectorizer_input  # type: GrammemeVectorizer
        self.grammeme_vectorizer_output = grammeme_vectorizer_output  # type: GrammemeVectorizer
        self.morph = MorphAnalyzer() if self.language == "ru" else None  # type: MorphAnalyzer
        self.converter = converters.converter('opencorpora-int', 'ud14') if self.language == "ru" else None 
开发者ID:IlyaGusev,项目名称:rnnmorph,代码行数:27,代码来源:batch_generator.py

示例2: __init__

# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self, data_path: Union[Path, str], *args, **kwargs) -> None:
        log.info(f"Initializing `{self.__class__.__name__}`")

        data_path = expand_path(data_path)
        with open(data_path / 'obscenity_words.json', encoding="utf-8") as f:
            self.obscenity_words = set(json.load(f))
        with open(data_path / 'obscenity_words_exception.json', encoding="utf-8") as f:
            self.obscenity_words_exception = set(json.load(f))
        if (data_path / 'obscenity_words_extended.json').exists():
            with open(data_path / 'obscenity_words_extended.json', encoding="utf-8") as f:
                self.obscenity_words_extended = set(json.load(f))
            self.obscenity_words.update(self.obscenity_words_extended)

        PATTERN_1, PATTERN_2 = self._get_patterns()
        self.regexp = re.compile(PATTERN_1, re.U | re.I)
        self.regexp2 = re.compile(PATTERN_2, re.U | re.I)
        self.morph = pymorphy2.MorphAnalyzer()
        self.word_pattern = re.compile(r'[А-яЁё]+') 
开发者ID:deepmipt,项目名称:DeepPavlov,代码行数:20,代码来源:ru_obscenity_classifier.py

示例3: __init__

# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self,
                 model_path=None,
                 tokenizer=None,
                 model_url='http://lnsigo.mipt.ru/export/models/ner/ner_model_total_rus.tar.gz'):
        self.model_path = (
            model_path
            or pkg_resources.resource_filename(__name__, "../model")
        )
        self.model_url = model_url
        self._lazy_download()

        with open(self._get_path('params.json')) as f:
            self.network_params = json.load(f)

        self.corpus = Corpus(dicts_filepath=self._get_path('dict.txt'))
        self.network = NER(
            self.corpus,
            verbouse=False,
            pretrained_model_filepath=self._get_path('ner_model'),
            **self.network_params,
        )

        self.tokenizer = tokenizer or Tokenizer()
        self._morph = pymorphy2.MorphAnalyzer() 
开发者ID:deepmipt,项目名称:ner,代码行数:26,代码来源:extractor.py

示例4: __init__

# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self):
		self.pos = ["NOUN", "VERB", "ADJF", "ADJS", "COMP",
		"INFN", "PRTF", "PRTS", "GRND", "NUMR", "ADVB",
		"NPRO", "PREP", "PRED", "CONJ", "PRCL", "INTJ", "QUES"]
		self.valency = ["tran", "intr"]
		self.numb = ["sing", "plur"]
		self.gend = ["musc", "femn", "neut"]
		self.per = ["1per", "2per", "3per"]
		self.tense= ["past", "pres", "futr"]
		self.cases = ["nomn", "gent", "datv", "accs", "acc2", "gen1", "gen2", "ablt", "loct", "voct", "loc1", "loc2"]

		self.grammar =  G;
		self.dict = d;
		self.morph = pymorphy2.MorphAnalyzer()
	

	# use pymorphy2 to define set of grammemes 
开发者ID:constantin50,项目名称:syntax_analyzer,代码行数:19,代码来源:analyzer.py

示例5: __init__

# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self):
        self.tokenizer = Tokenizer()
        self.tagger = POSTagger()
        self.morph = MorphAnalyzer() 
开发者ID:bureaucratic-labs,项目名称:dostoevsky,代码行数:6,代码来源:tokenization.py

示例6: __init__

# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self, raw=None):
        if not raw:
            raw = PymorphyAnalyzer()
        self.raw = raw 
开发者ID:natasha,项目名称:yargy,代码行数:6,代码来源:morph.py

示例7: get_aspect_matrix

# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def get_aspect_matrix(self, n_clusters=0):
        seed_words_path = os.path.join(self.data_path, "seed_words.txt")
        self.aspect_size = n_clusters
        km = KMeans(n_clusters=n_clusters)
        km.fit(self.emb_matrix)
        km_aspects = km.cluster_centers_
        if os.path.exists(seed_words_path):
            aspects = []
            morph = pymorphy2.MorphAnalyzer()
            with open(seed_words_path) as f:
                for line in f:
                    one_aspect = []
                    for word in re.split('\W+', line.lower()):
                        w = morph.parse(word)[0].normal_form
                        if w in self.embeddings:
                            one_aspect.append(self.embeddings[w])
                    if one_aspect:
                        one_aspect = np.mean(one_aspect, axis=0)
                    else:
                        print("Not initialized:\t" + line)
                        one_aspect = np.random.random((self.emb_dim,))
                    aspects.append(one_aspect)
            self.aspect_size += len(aspects)
            aspects = np.concatenate((km_aspects, np.stack(aspects)), axis=0)
        else:
            aspects = km_aspects
        # L2 normalization
        norm_aspect_matrix = aspects / np.linalg.norm(aspects, axis=-1, keepdims=True)
        return norm_aspect_matrix 
开发者ID:madrugado,项目名称:Attention-Based-Aspect-Extraction,代码行数:31,代码来源:w2vEmbReader.py

示例8: __init__

# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self):
        self.morph = pymorphy2.MorphAnalyzer() 
开发者ID:igor-shevchenko,项目名称:rutermextract,代码行数:4,代码来源:parser.py

示例9: __init__

# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self,
                 language="ru",
                 eval_model_config_path: str=None,
                 eval_model_weights_path: str=None,
                 gram_dict_input: str=None,
                 gram_dict_output: str=None,
                 word_vocabulary: str=None,
                 char_set_path: str=None,
                 build_config: str=None):
        if eval_model_config_path is None:
            eval_model_config_path = MODELS_PATHS[language]["eval_model_config"]
        if eval_model_weights_path is None:
            eval_model_weights_path = MODELS_PATHS[language]["eval_model_weights"]
        if gram_dict_input is None:
            gram_dict_input = MODELS_PATHS[language]["gram_input"]
        if gram_dict_output is None:
            gram_dict_output = MODELS_PATHS[language]["gram_output"]
        if word_vocabulary is None:
            word_vocabulary = MODELS_PATHS[language]["word_vocabulary"]
        if char_set_path is None:
            char_set_path = MODELS_PATHS[language]["char_set"]
        if build_config is None:
            build_config = MODELS_PATHS[language]["build_config"]

        self.language = language
        self.converter = converters.converter('opencorpora-int', 'ud14') if language == "ru" else None
        self.morph = MorphAnalyzer() if language == "ru" else None
        if self.language == "en":
            nltk.download("wordnet")
            nltk.download('averaged_perceptron_tagger')
            nltk.download('universal_tagset')

        self.build_config = BuildModelConfig()
        self.build_config.load(build_config)

        self.model = LSTMMorphoAnalysis(language=language)
        self.model.prepare(gram_dict_input, gram_dict_output, word_vocabulary, char_set_path)
        self.model.load_eval(self.build_config, eval_model_config_path, eval_model_weights_path) 
开发者ID:IlyaGusev,项目名称:rnnmorph,代码行数:40,代码来源:predictor.py

示例10: __init__

# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self, language: str):
        self.language = language  # type: str
        self.morph = MorphAnalyzer() if language == "ru" else None  # type: MorphAnalyzer
        self.converter = converters.converter('opencorpora-int', 'ud14') if self.language == "ru" else None
        self.grammeme_vectorizer_input = GrammemeVectorizer()  # type: GrammemeVectorizer
        self.grammeme_vectorizer_output = GrammemeVectorizer()  # type: GrammemeVectorizer
        self.word_vocabulary = WordVocabulary()  # type: WordVocabulary
        self.char_set = ""  # type: str
        self.train_model = None  # type: Model
        self.eval_model = None  # type: Model 
开发者ID:IlyaGusev,项目名称:rnnmorph,代码行数:12,代码来源:model.py

示例11: __init__

# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self, language: str):
        self.language = language
        self.grammeme_vectorizer_input = GrammemeVectorizer()  # type: GrammemeVectorizer
        self.grammeme_vectorizer_output = GrammemeVectorizer()  # type: GrammemeVectorizer
        self.word_vocabulary = WordVocabulary()  # type: WordVocabulary
        self.char_set = set()
        self.morph = MorphAnalyzer() if self.language == "ru" else None  # type: MorphAnalyzer
        self.converter = converters.converter('opencorpora-int', 'ud14') if self.language == "ru" else None 
开发者ID:IlyaGusev,项目名称:rnnmorph,代码行数:10,代码来源:loader.py

示例12: __init__

# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self, stopwords: Optional[List[str]] = None, ngram_range: List[int] = None,
                 lemmas: bool = False, lowercase: Optional[bool] = None,
                 alphas_only: Optional[bool] = None, **kwargs):

        if ngram_range is None:
            ngram_range = [1, 1]
        self.stopwords = stopwords or []
        self.tokenizer = ToktokTokenizer()
        self.lemmatizer = pymorphy2.MorphAnalyzer()
        self.ngram_range = tuple(ngram_range)  # cast JSON array to tuple
        self.lemmas = lemmas
        self.lowercase = lowercase
        self.alphas_only = alphas_only
        self.tok2morph = {} 
开发者ID:deepmipt,项目名称:DeepPavlov,代码行数:16,代码来源:ru_tokenizer.py

示例13: __init__

# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self, save_path: str, load_path: str, max_pymorphy_variants: int = -1, **kwargs) -> None:
        super().__init__(save_path, load_path, **kwargs)
        self.max_pymorphy_variants = max_pymorphy_variants
        self.load()
        self.memorized_word_indexes = dict()
        self.memorized_tag_indexes = dict()
        self.analyzer = MorphAnalyzer()
        self.converter = converters.converter('opencorpora-int', 'ud20') 
开发者ID:deepmipt,项目名称:DeepPavlov,代码行数:10,代码来源:word_vectorizer.py

示例14: __init__

# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self, *args, **kwargs):
        self.lemmatizer = pymorphy2.MorphAnalyzer() 
开发者ID:deepmipt,项目名称:DeepPavlov,代码行数:4,代码来源:russian_lemmatizer.py

示例15: __init__

# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self, save_path: Optional[str] = None, load_path: Optional[str] = None,
                 transform_lemmas=False, **kwargs) -> None:
        self.transform_lemmas = transform_lemmas
        self._reset()
        self.analyzer = MorphAnalyzer()
        self.converter = converters.converter("opencorpora-int", "ud20")
        super().__init__(save_path, load_path, **kwargs) 
开发者ID:deepmipt,项目名称:DeepPavlov,代码行数:9,代码来源:lemmatizer.py


注:本文中的pymorphy2.MorphAnalyzer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。