当前位置: 首页>>代码示例>>Python>>正文


Python Manager.extend方法代码示例

本文整理汇总了Python中multiprocessing.Manager.extend方法的典型用法代码示例。如果您正苦于以下问题:Python Manager.extend方法的具体用法?Python Manager.extend怎么用?Python Manager.extend使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在multiprocessing.Manager的用法示例。


在下文中一共展示了Manager.extend方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: SenseEmbedding

# 需要导入模块: from multiprocessing import Manager [as 别名]
# 或者: from multiprocessing.Manager import extend [as 别名]
class SenseEmbedding(WordEmbedding.WordModel):
    """
        Implementation of Sense2Vec;  NP, VP and POS tag based embedding
        reference : http://arxiv.org/pdf/1511.06388v1.pdf
        """

    # DO NOT change this ordering, need to figure out a better way to achieve this
    senses = ['NOUN', 'VERB', 'ADJECTIVE', 'CONJUNCTION', 'CARDINAL', 'DEFAULT']

    def __init__(self, data_sources, workers, *args, **kwargs):
        """
        Sense2vec embedding
        :param data_sources: list of data sources to pull data from
        :param workers: number of processes to create in the pool
        """
        WordEmbedding.WordModel.__init__(self, *args, **kwargs)
        self.sources = data_sources
        self.annotator = tools.Annotator()
        self.workers = workers
        self.tokenized_blocks = Manager().list()
        self.stemmer = PorterStemmer()
        self.stop_words = set(stopwords.words('english'))
        self.word_to_tag = defaultdict(list)

    def form_tag_tokens(self):
        for word_tag in self.model.vocab:
            word, tag = word_tag.split("|")
            self.word_to_tag[word].append(tag)

    def get_tags_for_word(self, word):
        token_tags = self.word_to_tag.get(word, None)
        if not token_tags: return []
        return [word + "|" + tag for tag in token_tags]

    def tokenize(self, text_block):
        sense_phrases = sense_tokenize(text_block, self.annotator, self.stemmer, self.stop_words)
        self.tokenized_blocks.extend(sense_phrases)

    def get_sense_vec(self, entity, dimension, sense='NOUN'):

        if sense == 'NOUN':
            if self.model.vocab.has_key(entity + '|NOUN'):
                return self.model[entity + '|NOUN']

            elif self.model.vocab.has_key(entity + '|NP'):
                return self.model[entity + '|NP']

            else:
                entities = entity.split(" ")
                entity_vec = [self.model[e + '|NOUN'] for e in entities if e + '|NOUN'
                              in self.model.vocab]
                entity_vec.extend([self.get_vector(e, dimension, 'NOUN') for e in entities
                                   if e + '|NOUN' not in self.model.vocab])
                return np.average(entity_vec, axis=0)

        else:
            if self.model.vocab.has_key(entity + '|VERB'):
                return self.model[entity + '|VERB']

            elif self.model.vocab.has_key(entity + '|VP'):
                return self.model[entity + '|VP']

            else:
                entities = entity.split(" ")
                entity_vec = [self.model[e + '|VERB'] for e in entities if e + '|VERB'
                              in self.model.vocab]
                entity_vec.extend([self.get_vector(e, dimension, 'VERB') for e in entities
                                   if e + '|VERB' not in self.model.vocab])
                return np.average(entity_vec, axis=0)

    def get_vector(self, word, dimension, sense_except='NOUN'):

        words = [word] * (len(SenseEmbedding.senses) - 1)
        senses = list(SenseEmbedding.senses)
        senses.remove(sense_except)
        word_with_sense = [w + '|' + s for w,s in zip(words, senses)]
        for word in word_with_sense:
            if self.model.vocab.has_key(word):
                return self.model[word]

        return np.random.normal(0, 1, dimension)

    def form_model(self):
        text_blocks = []
        for source in self.sources:
            source.start()

        logger.info("Reading the text blocks from the source")
        for item_tuple in chain(*self.sources):
            if not item_tuple:
                logger.warn("item read from source is empty")
                continue

            item = " ".join([t[1] for t in item_tuple])
            if item == '': continue
            text_blocks.append(item)

        logger.info("Read all the text blocks")
        logger.info("Number of text blocks read : %d" % len(text_blocks))
        logger.info("will sentence and word tokenize the text blocks")
#.........这里部分代码省略.........
开发者ID:subhadeepmaji,项目名称:ml_algorithms,代码行数:103,代码来源:SenseEmbedding.py


注:本文中的multiprocessing.Manager.extend方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。