本文整理汇总了Python中nltk.corpus.wordnet.synset_from_pos_and_offset方法的典型用法代码示例。如果您正苦于以下问题:Python wordnet.synset_from_pos_and_offset方法的具体用法?Python wordnet.synset_from_pos_and_offset怎么用?Python wordnet.synset_from_pos_and_offset使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.corpus.wordnet
的用法示例。
在下文中一共展示了wordnet.synset_from_pos_and_offset方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: fit
# 需要导入模块: from nltk.corpus import wordnet [as 别名]
# 或者: from nltk.corpus.wordnet import synset_from_pos_and_offset [as 别名]
def fit(self, X):
"""Fit the transformer."""
super().fit(X)
X = self._unpack(X)
related = Counter()
for x in X:
for (offset, pos) in x:
s = wn.synset_from_pos_and_offset(pos, int(offset))
related.update(self.recursive_related(s))
if self.prune:
related = {k: v for k, v in related.items() if v > 1}
self.feature_names = set(chain.from_iterable(X))
self.features = {k: idx for idx, k in enumerate(related)}
self.vec_len = len(self.features)
return self
示例2: procecss_wn18_definitions
# 需要导入模块: from nltk.corpus import wordnet [as 别名]
# 或者: from nltk.corpus.wordnet import synset_from_pos_and_offset [as 别名]
def procecss_wn18_definitions(self, wn18_path=""):
definitions_file = os.path.join(wn18_path, "wordnet-mlj12-definitions.txt")
with open(definitions_file) as f:
lines = f.readlines()
pos_conversion_map = {'NN':'n', 'VB':'v', 'JJ':'a', 'RB':'r'}
self.definition_map = {}
n_empty_definitions = 0
for line in tqdm(lines):
synset_offset, synset_tag, _ = line.split('\t')
#definition_map[synset_idx.strip()] = definition.strip()
#fetch definition from wordnet
pos = synset_tag.split('_')[-2]
wn_ss = wn.synset_from_pos_and_offset(pos_conversion_map[pos], int(synset_offset))
definition = wn_ss.definition().strip()
if len(definition) == 0:
n_empty_definitions = n_empty_definitions + 1
self.definition_map[synset_offset.strip()] = definition
print ("#Empty definitions {}/{}".format(n_empty_definitions, len(self.definition_map)))
synsets = sorted(self.definition_map.keys())
self.synset_to_idx = {v:i for i,v in enumerate(synsets)}
self.idx_to_synset = {v:i for i,v in self.synset_to_idx.items()}
self.definitions = [self.definition_map[k] for k in synsets]
示例3: senti_synset
# 需要导入模块: from nltk.corpus import wordnet [as 别名]
# 或者: from nltk.corpus.wordnet import synset_from_pos_and_offset [as 别名]
def senti_synset(self, *vals):
from nltk.corpus import wordnet as wn
if tuple(vals) in self._db:
pos_score, neg_score = self._db[tuple(vals)]
pos, offset = vals
if pos == 's':
pos = 'a'
synset = wn.synset_from_pos_and_offset(pos, offset)
return SentiSynset(pos_score, neg_score, synset)
else:
synset = wn.synset(vals[0])
pos = synset.pos()
if pos == 's':
pos = 'a'
offset = synset.offset()
if (pos, offset) in self._db:
pos_score, neg_score = self._db[(pos, offset)]
return SentiSynset(pos_score, neg_score, synset)
else:
return None
示例4: id2ss
# 需要导入模块: from nltk.corpus import wordnet [as 别名]
# 或者: from nltk.corpus.wordnet import synset_from_pos_and_offset [as 别名]
def id2ss(self,ID):
"""
Parameters
----------
ID : str
the id of a chinese word found in zh_wordnet.
Returns
----------
nltk.corpus.reader.wordnet.Synset
an object in en_wordnet.
"""
return wn.synset_from_pos_and_offset(str(ID[-1:]), int(ID[:8]))
示例5: vectorize
# 需要导入模块: from nltk.corpus import wordnet [as 别名]
# 或者: from nltk.corpus.wordnet import synset_from_pos_and_offset [as 别名]
def vectorize(self, x):
"""Vectorize a word."""
vec = np.zeros(self.vec_len)
for (offset, pos) in x:
s = wn.synset_from_pos_and_offset(pos, int(offset))
res = self.recursive_related(s)
if self.prune:
res = [x for x in res if x in self.features]
s = [self.features[s] for s in res]
vec[s] = 1
return vec
示例6: getnode
# 需要导入模块: from nltk.corpus import wordnet [as 别名]
# 或者: from nltk.corpus.wordnet import synset_from_pos_and_offset [as 别名]
def getnode(x):
return wn.synset_from_pos_and_offset('n', int(x[1:]))
示例7: all_senti_synsets
# 需要导入模块: from nltk.corpus import wordnet [as 别名]
# 或者: from nltk.corpus.wordnet import synset_from_pos_and_offset [as 别名]
def all_senti_synsets(self):
from nltk.corpus import wordnet as wn
for key, fields in self._db.items():
pos, offset = key
pos_score, neg_score = fields
synset = wn.synset_from_pos_and_offset(pos, offset)
yield SentiSynset(pos_score, neg_score, synset)