本文整理汇总了Python中multiprocessing.Manager.extend方法的典型用法代码示例。如果您正苦于以下问题:Python Manager.extend方法的具体用法?Python Manager.extend怎么用?Python Manager.extend使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类multiprocessing.Manager
的用法示例。
在下文中一共展示了Manager.extend方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: SenseEmbedding
# 需要导入模块: from multiprocessing import Manager [as 别名]
# 或者: from multiprocessing.Manager import extend [as 别名]
class SenseEmbedding(WordEmbedding.WordModel):
"""
Implementation of Sense2Vec; NP, VP and POS tag based embedding
reference : http://arxiv.org/pdf/1511.06388v1.pdf
"""
# DO NOT change this ordering, need to figure out a better way to achieve this
senses = ['NOUN', 'VERB', 'ADJECTIVE', 'CONJUNCTION', 'CARDINAL', 'DEFAULT']
def __init__(self, data_sources, workers, *args, **kwargs):
"""
Sense2vec embedding
:param data_sources: list of data sources to pull data from
:param workers: number of processes to create in the pool
"""
WordEmbedding.WordModel.__init__(self, *args, **kwargs)
self.sources = data_sources
self.annotator = tools.Annotator()
self.workers = workers
self.tokenized_blocks = Manager().list()
self.stemmer = PorterStemmer()
self.stop_words = set(stopwords.words('english'))
self.word_to_tag = defaultdict(list)
def form_tag_tokens(self):
for word_tag in self.model.vocab:
word, tag = word_tag.split("|")
self.word_to_tag[word].append(tag)
def get_tags_for_word(self, word):
token_tags = self.word_to_tag.get(word, None)
if not token_tags: return []
return [word + "|" + tag for tag in token_tags]
def tokenize(self, text_block):
sense_phrases = sense_tokenize(text_block, self.annotator, self.stemmer, self.stop_words)
self.tokenized_blocks.extend(sense_phrases)
def get_sense_vec(self, entity, dimension, sense='NOUN'):
if sense == 'NOUN':
if self.model.vocab.has_key(entity + '|NOUN'):
return self.model[entity + '|NOUN']
elif self.model.vocab.has_key(entity + '|NP'):
return self.model[entity + '|NP']
else:
entities = entity.split(" ")
entity_vec = [self.model[e + '|NOUN'] for e in entities if e + '|NOUN'
in self.model.vocab]
entity_vec.extend([self.get_vector(e, dimension, 'NOUN') for e in entities
if e + '|NOUN' not in self.model.vocab])
return np.average(entity_vec, axis=0)
else:
if self.model.vocab.has_key(entity + '|VERB'):
return self.model[entity + '|VERB']
elif self.model.vocab.has_key(entity + '|VP'):
return self.model[entity + '|VP']
else:
entities = entity.split(" ")
entity_vec = [self.model[e + '|VERB'] for e in entities if e + '|VERB'
in self.model.vocab]
entity_vec.extend([self.get_vector(e, dimension, 'VERB') for e in entities
if e + '|VERB' not in self.model.vocab])
return np.average(entity_vec, axis=0)
def get_vector(self, word, dimension, sense_except='NOUN'):
words = [word] * (len(SenseEmbedding.senses) - 1)
senses = list(SenseEmbedding.senses)
senses.remove(sense_except)
word_with_sense = [w + '|' + s for w,s in zip(words, senses)]
for word in word_with_sense:
if self.model.vocab.has_key(word):
return self.model[word]
return np.random.normal(0, 1, dimension)
def form_model(self):
text_blocks = []
for source in self.sources:
source.start()
logger.info("Reading the text blocks from the source")
for item_tuple in chain(*self.sources):
if not item_tuple:
logger.warn("item read from source is empty")
continue
item = " ".join([t[1] for t in item_tuple])
if item == '': continue
text_blocks.append(item)
logger.info("Read all the text blocks")
logger.info("Number of text blocks read : %d" % len(text_blocks))
logger.info("will sentence and word tokenize the text blocks")
#.........这里部分代码省略.........