当前位置: 首页>>代码示例>>Python>>正文


Python Util.get_words方法代码示例

本文整理汇总了Python中lib.util.Util.get_words方法的典型用法代码示例。如果您正苦于以下问题:Python Util.get_words方法的具体用法?Python Util.get_words怎么用?Python Util.get_words使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在lib.util.Util的用法示例。


在下文中一共展示了Util.get_words方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: extract_local_words_batch

# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_words [as 别名]
    def extract_local_words_batch(self, params):
        lwords = {}
        word_counts = {}

        for user in self.users.iter():
            location = user['location_point']
            if location != None:
                city = str(self.model.predict([location])[0])
                tweets = self.tweets.get(user['id'])
                user_words = set([])
                for tweet in tweets:
                    user_words |= set(Util.get_words(tweet['text']))
                for w in user_words:
                    if not w in word_counts:
                        word_counts[w] = {city: 1}
                    elif not city in word_counts[w]:
                        word_counts[w][city] = 1
                    else:
                        word_counts[w][city] += 1

        """ calculating divergences """
        for w in word_counts:
            N = float(sum([v for v in word_counts[w].values()]))
            if N >= params['cmin']:
                d = self.calc_divergence(N, word_counts[w], params)
                if self.check_divergence(d, params) == True:
                        lwords[w] = {'word':w, 'd':d, 'distribution':word_counts[w]} # save as dict
        return Words(lwords)
开发者ID:yamaguchiyuto,项目名称:location_inference,代码行数:30,代码来源:olimg.py

示例2: extract_local_words

# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_words [as 别名]
    def extract_local_words(self, tweets, params):
        lwords = {}
        word_counts = {}

        """ making user sets """
        user_sets = {}
        for tweet in tweets:
            words = Util.get_words(tweet['text'])
            for w in words:
                if not w in user_sets: user_sets[w] = set([])
                user_sets[w].add(tweet['user_id'])

        """ making word distributions """
        for w in user_sets:
            for user_id in user_sets[w]:
                user = self.users.get(user_id)
                if user != None:
                    location = user['location_point']
                    if location != None:
                        """ labeled user """
                        if not w in word_counts: word_counts[w] = {}
                        city = str(self.model.predict([location])[0])
                        if not city in word_counts[w]: word_counts[w][city] = 0
                        word_counts[w][city] += 1

        """ calculating divergences """
        for w in word_counts:
            N = float(sum([v for v in word_counts[w].values()]))
            if N >= params['cmin']:
                d = self.calc_divergence(N, word_counts[w], params)
                if self.check_divergence(d, params) == True:
                        lwords[w] = {'word':w, 'd':d, 'distribution':word_counts[w]} # save as dict
        return Words(lwords)
开发者ID:yamaguchiyuto,项目名称:location_inference,代码行数:35,代码来源:olimg.py

示例3: update_user_distributions

# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_words [as 别名]
 def update_user_distributions(self, tweets, tlwords, params):
     for tweet in tweets:
         neighbors = self.get_neighbors(tweet['user_id'], params)
         users = neighbors | set([tweet['user_id']])
         for user_id in users:
             user = self.users.get(user_id)
             if user != None:
                 if user['location_point'] == None:
                     """ unlabeled users """
                     if not user['id'] in self.user_distributions:
                         self.user_distributions[user['id']] = self.init_user_distribution()
                     words = Util.get_words(tweet['text'])
                     for w in words:
                         if tlwords.contain(w):
                             """ update using temporally-local word """
                             tlword = tlwords.get(w)
                             self.user_distributions[user['id']] = self.add_distribution(self.user_distributions[user['id']], tlword['distribution'])
                         if self.lwords.contain(w):
                             """ update using local word """
                             lword = self.lwords.get(w)
                             if params['divergence'] in ['l2', 'kl']:
                                 if lword['d'] >= params['dmin']:
                                     self.user_distributions[user['id']] = self.add_distribution(self.user_distributions[user['id']], lword['distribution'])
                             else:
                                 if lword['d'] <= params['dmin']:
                                     self.user_distributions[user['id']] = self.add_distribution(self.user_distributions[user['id']], lword['distribution'])
开发者ID:yamaguchiyuto,项目名称:location_inference,代码行数:28,代码来源:olimg.py


注:本文中的lib.util.Util.get_words方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。