本文整理汇总了Python中pymorphy2.MorphAnalyzer方法的典型用法代码示例。如果您正苦于以下问题:Python pymorphy2.MorphAnalyzer方法的具体用法?Python pymorphy2.MorphAnalyzer怎么用?Python pymorphy2.MorphAnalyzer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pymorphy2
的用法示例。
在下文中一共展示了pymorphy2.MorphAnalyzer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self, language: str,
file_names: List[str],
config: TrainConfig,
grammeme_vectorizer_input: GrammemeVectorizer,
grammeme_vectorizer_output: GrammemeVectorizer,
indices: np.array,
word_vocabulary: WordVocabulary,
char_set: str,
build_config: BuildModelConfig):
self.language = language
self.file_names = file_names # type: List[str]
# Параметры батчей.
self.batch_size = config.external_batch_size # type: int
self.bucket_borders = config.sentence_len_groups # type: List[Tuple[int]]
self.buckets = [list() for _ in range(len(self.bucket_borders))]
self.build_config = build_config
self.word_vocabulary = word_vocabulary
self.char_set = char_set
# Разбиение на выборки.
self.indices = indices # type: np.array
# Подготовленные словари.
self.grammeme_vectorizer_input = grammeme_vectorizer_input # type: GrammemeVectorizer
self.grammeme_vectorizer_output = grammeme_vectorizer_output # type: GrammemeVectorizer
self.morph = MorphAnalyzer() if self.language == "ru" else None # type: MorphAnalyzer
self.converter = converters.converter('opencorpora-int', 'ud14') if self.language == "ru" else None
示例2: __init__
# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self, data_path: Union[Path, str], *args, **kwargs) -> None:
log.info(f"Initializing `{self.__class__.__name__}`")
data_path = expand_path(data_path)
with open(data_path / 'obscenity_words.json', encoding="utf-8") as f:
self.obscenity_words = set(json.load(f))
with open(data_path / 'obscenity_words_exception.json', encoding="utf-8") as f:
self.obscenity_words_exception = set(json.load(f))
if (data_path / 'obscenity_words_extended.json').exists():
with open(data_path / 'obscenity_words_extended.json', encoding="utf-8") as f:
self.obscenity_words_extended = set(json.load(f))
self.obscenity_words.update(self.obscenity_words_extended)
PATTERN_1, PATTERN_2 = self._get_patterns()
self.regexp = re.compile(PATTERN_1, re.U | re.I)
self.regexp2 = re.compile(PATTERN_2, re.U | re.I)
self.morph = pymorphy2.MorphAnalyzer()
self.word_pattern = re.compile(r'[А-яЁё]+')
示例3: __init__
# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self,
model_path=None,
tokenizer=None,
model_url='http://lnsigo.mipt.ru/export/models/ner/ner_model_total_rus.tar.gz'):
self.model_path = (
model_path
or pkg_resources.resource_filename(__name__, "../model")
)
self.model_url = model_url
self._lazy_download()
with open(self._get_path('params.json')) as f:
self.network_params = json.load(f)
self.corpus = Corpus(dicts_filepath=self._get_path('dict.txt'))
self.network = NER(
self.corpus,
verbouse=False,
pretrained_model_filepath=self._get_path('ner_model'),
**self.network_params,
)
self.tokenizer = tokenizer or Tokenizer()
self._morph = pymorphy2.MorphAnalyzer()
示例4: __init__
# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self):
self.pos = ["NOUN", "VERB", "ADJF", "ADJS", "COMP",
"INFN", "PRTF", "PRTS", "GRND", "NUMR", "ADVB",
"NPRO", "PREP", "PRED", "CONJ", "PRCL", "INTJ", "QUES"]
self.valency = ["tran", "intr"]
self.numb = ["sing", "plur"]
self.gend = ["musc", "femn", "neut"]
self.per = ["1per", "2per", "3per"]
self.tense= ["past", "pres", "futr"]
self.cases = ["nomn", "gent", "datv", "accs", "acc2", "gen1", "gen2", "ablt", "loct", "voct", "loc1", "loc2"]
self.grammar = G;
self.dict = d;
self.morph = pymorphy2.MorphAnalyzer()
# use pymorphy2 to define set of grammemes
示例5: __init__
# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self):
self.tokenizer = Tokenizer()
self.tagger = POSTagger()
self.morph = MorphAnalyzer()
示例6: __init__
# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self, raw=None):
if not raw:
raw = PymorphyAnalyzer()
self.raw = raw
示例7: get_aspect_matrix
# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def get_aspect_matrix(self, n_clusters=0):
seed_words_path = os.path.join(self.data_path, "seed_words.txt")
self.aspect_size = n_clusters
km = KMeans(n_clusters=n_clusters)
km.fit(self.emb_matrix)
km_aspects = km.cluster_centers_
if os.path.exists(seed_words_path):
aspects = []
morph = pymorphy2.MorphAnalyzer()
with open(seed_words_path) as f:
for line in f:
one_aspect = []
for word in re.split('\W+', line.lower()):
w = morph.parse(word)[0].normal_form
if w in self.embeddings:
one_aspect.append(self.embeddings[w])
if one_aspect:
one_aspect = np.mean(one_aspect, axis=0)
else:
print("Not initialized:\t" + line)
one_aspect = np.random.random((self.emb_dim,))
aspects.append(one_aspect)
self.aspect_size += len(aspects)
aspects = np.concatenate((km_aspects, np.stack(aspects)), axis=0)
else:
aspects = km_aspects
# L2 normalization
norm_aspect_matrix = aspects / np.linalg.norm(aspects, axis=-1, keepdims=True)
return norm_aspect_matrix
示例8: __init__
# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self):
self.morph = pymorphy2.MorphAnalyzer()
示例9: __init__
# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self,
language="ru",
eval_model_config_path: str=None,
eval_model_weights_path: str=None,
gram_dict_input: str=None,
gram_dict_output: str=None,
word_vocabulary: str=None,
char_set_path: str=None,
build_config: str=None):
if eval_model_config_path is None:
eval_model_config_path = MODELS_PATHS[language]["eval_model_config"]
if eval_model_weights_path is None:
eval_model_weights_path = MODELS_PATHS[language]["eval_model_weights"]
if gram_dict_input is None:
gram_dict_input = MODELS_PATHS[language]["gram_input"]
if gram_dict_output is None:
gram_dict_output = MODELS_PATHS[language]["gram_output"]
if word_vocabulary is None:
word_vocabulary = MODELS_PATHS[language]["word_vocabulary"]
if char_set_path is None:
char_set_path = MODELS_PATHS[language]["char_set"]
if build_config is None:
build_config = MODELS_PATHS[language]["build_config"]
self.language = language
self.converter = converters.converter('opencorpora-int', 'ud14') if language == "ru" else None
self.morph = MorphAnalyzer() if language == "ru" else None
if self.language == "en":
nltk.download("wordnet")
nltk.download('averaged_perceptron_tagger')
nltk.download('universal_tagset')
self.build_config = BuildModelConfig()
self.build_config.load(build_config)
self.model = LSTMMorphoAnalysis(language=language)
self.model.prepare(gram_dict_input, gram_dict_output, word_vocabulary, char_set_path)
self.model.load_eval(self.build_config, eval_model_config_path, eval_model_weights_path)
示例10: __init__
# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self, language: str):
self.language = language # type: str
self.morph = MorphAnalyzer() if language == "ru" else None # type: MorphAnalyzer
self.converter = converters.converter('opencorpora-int', 'ud14') if self.language == "ru" else None
self.grammeme_vectorizer_input = GrammemeVectorizer() # type: GrammemeVectorizer
self.grammeme_vectorizer_output = GrammemeVectorizer() # type: GrammemeVectorizer
self.word_vocabulary = WordVocabulary() # type: WordVocabulary
self.char_set = "" # type: str
self.train_model = None # type: Model
self.eval_model = None # type: Model
示例11: __init__
# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self, language: str):
self.language = language
self.grammeme_vectorizer_input = GrammemeVectorizer() # type: GrammemeVectorizer
self.grammeme_vectorizer_output = GrammemeVectorizer() # type: GrammemeVectorizer
self.word_vocabulary = WordVocabulary() # type: WordVocabulary
self.char_set = set()
self.morph = MorphAnalyzer() if self.language == "ru" else None # type: MorphAnalyzer
self.converter = converters.converter('opencorpora-int', 'ud14') if self.language == "ru" else None
示例12: __init__
# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self, stopwords: Optional[List[str]] = None, ngram_range: List[int] = None,
lemmas: bool = False, lowercase: Optional[bool] = None,
alphas_only: Optional[bool] = None, **kwargs):
if ngram_range is None:
ngram_range = [1, 1]
self.stopwords = stopwords or []
self.tokenizer = ToktokTokenizer()
self.lemmatizer = pymorphy2.MorphAnalyzer()
self.ngram_range = tuple(ngram_range) # cast JSON array to tuple
self.lemmas = lemmas
self.lowercase = lowercase
self.alphas_only = alphas_only
self.tok2morph = {}
示例13: __init__
# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self, save_path: str, load_path: str, max_pymorphy_variants: int = -1, **kwargs) -> None:
super().__init__(save_path, load_path, **kwargs)
self.max_pymorphy_variants = max_pymorphy_variants
self.load()
self.memorized_word_indexes = dict()
self.memorized_tag_indexes = dict()
self.analyzer = MorphAnalyzer()
self.converter = converters.converter('opencorpora-int', 'ud20')
示例14: __init__
# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self, *args, **kwargs):
self.lemmatizer = pymorphy2.MorphAnalyzer()
示例15: __init__
# 需要导入模块: import pymorphy2 [as 别名]
# 或者: from pymorphy2 import MorphAnalyzer [as 别名]
def __init__(self, save_path: Optional[str] = None, load_path: Optional[str] = None,
transform_lemmas=False, **kwargs) -> None:
self.transform_lemmas = transform_lemmas
self._reset()
self.analyzer = MorphAnalyzer()
self.converter = converters.converter("opencorpora-int", "ud20")
super().__init__(save_path, load_path, **kwargs)