当前位置: 首页>>代码示例>>Python>>正文


Python Util.get_nouns方法代码示例

本文整理汇总了Python中lib.util.Util.get_nouns方法的典型用法代码示例。如果您正苦于以下问题:Python Util.get_nouns方法的具体用法?Python Util.get_nouns怎么用?Python Util.get_nouns使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在lib.util.Util的用法示例。


在下文中一共展示了Util.get_nouns方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: extract_local_words_batch

# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_nouns [as 别名]
    def extract_local_words_batch(self, params):
        lwords = {}
        word_counts = {}

        for user in self.users.iter():
            location = user['location_point']
            if location != None:
                tweets = self.tweets.get(user['id'])
                user_words = set([])
                city = str(tuple(location))
                for tweet in tweets:
                    user_words |= set(Util.get_nouns(tweet['text'], params['lang']))
                for w in user_words:
                    if not w in word_counts:
                        word_counts[w] = {city: 1}
                    elif not city in word_counts[w]:
                        word_counts[w][city] = 1
                    else:
                        word_counts[w][city] += 1

        """ calculating divergences """
        for w in word_counts:
            N = float(sum([v for v in word_counts[w].values()]))
            if N >= params['cmin']:
                d = self.calc_dispersion(word_counts[w], params)
                if d < params['dmax']:
                        lwords[w] = {'word':w, 'd':d, 'distribution':word_counts[w]} # save as dict
        return Words(lwords)
开发者ID:yamaguchiyuto,项目名称:location_inference,代码行数:30,代码来源:cheng.py

示例2: learn

# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_nouns [as 别名]
    def learn(self, params):
        tf = {} # term frequency in a location
        lf = {} # location frequency
        global_tf = {} # term frequency
        all_tf = 0.0

        for user in self.users.iter():
            location = user['location_point']
            if location != None:
                tweets = self.tweets.get(user['id'])
                user_words = set([])
                location = tuple(location)

                if not location in tf: tf[location] = {}
                if not location in lf: lf[location] = 0

                for tweet in tweets:
                    user_words |= set(Util.get_nouns(tweet['text'], params['lang']))

                for w in user_words:
                    if not w in tf[location]: tf[location][w] = 0
                    if not w in global_tf: global_tf[w] = 0
                    tf[location][w] += 1
                    global_tf[w] += 1
                    lf[location] += 1
                    all_tf += 1

        for w in global_tf.keys():
            if global_tf[w] < params['mincount']:
                del global_tf[w]
            else:
                global_tf[w] /= all_tf

        return {'tf':tf, 'global_tf':global_tf, 'lf':lf}
开发者ID:yamaguchiyuto,项目名称:location_inference,代码行数:36,代码来源:kinsella.py

示例3: infer

# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_nouns [as 别名]
    def infer(self, qtree):
        self.ud = UserDistribution(self.params['N'])
        self.wd = WordDistribution(self.params['N'])
        self.window = Window(self.params['N'])
        self.kl = KL(params['N'], self.population)
        for tweet in self.tweets.stream():
            if type(tweet) == type({}) and 'timestamp' in tweet:
                user = self.users.get(tweet['user_id'])
                if user != None:
                    cl = user['location'] # user who posts this tweet
                    words = set(Util.get_nouns(tweet['text'], self.params['lang'])) # words contained in this tweet
                    if cl != None:
                        """ labeled user """
                        aid = qtree.get_area_id(cl)
                        if aid != None:
                            self.updateKL(user, qtree.get_area_id(cl), words)
                    else:
                        """ unlabeled user """
                        self.updateUD(user, words, self.params['dmin'])

        """ Location prediction using user distribution """
        for user in self.users.iter():
            if user['location'] == None:
                """ unlabeled user """
                ud = self.ud.get(user['id'])
                if ud != None:
                    """ at least one observation """
                    inferred_location_number = self.predict(ud)
                    inferred_location_coordinates = qtree.leaves[inferred_location_number].center
                    user['location'] = inferred_location_coordinates
开发者ID:eaglebh,项目名称:olim,代码行数:32,代码来源:olim.py

示例4: extract_local_words

# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_nouns [as 别名]
    def extract_local_words(self, tweets, params):
        lwords = {}
        word_counts = {}

        """ making user sets """
        user_sets = {}
        for tweet in tweets:
            words = Util.get_nouns(tweet['text'], params['lang'])
            for w in words:
                if not w in user_sets: user_sets[w] = set([])
                user_sets[w].add(tweet['user_id'])

        """ making word distributions """
        for w in user_sets:
            for user_id in user_sets[w]:
                user = self.users.get(user_id)
                if user != None:
                    location = user['location_point']
                    if location != None:
                        """ labeled user """
                        if not w in word_counts: word_counts[w] = {}
                        city = str(self.model.predict([location])[0])
                        if not city in word_counts[w]: word_counts[w][city] = 0
                        word_counts[w][city] += 1

        """ calculating divergences """
        for w in word_counts:
            N = float(sum([v for v in word_counts[w].values()]))
            if N >= params['cmin']:
                d = self.calc_divergence(N, word_counts[w], params)
                if self.check_divergence(d, params) == True:
                        lwords[w] = {'word':w, 'd':d, 'distribution':word_counts[w]} # save as dict
        return Words(lwords)
开发者ID:yamaguchiyuto,项目名称:location_inference,代码行数:35,代码来源:olim.py

示例5: update_user_distributions

# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_nouns [as 别名]
 def update_user_distributions(self, tweet, params):
     user = self.users.get(tweet['user_id'])
     if user != None:
         if user['location_point'] == None:
             """ unlabeled users """
             if not user['id'] in self.user_distributions:
                 self.user_distributions[user['id']] = self.init_user_distribution()
             words = Util.get_nouns(tweet['text'], params['lang'])
             for w in words:
                 if self.lwords.contain(w):
                     """ update using local word """
                     lword = self.lwords.get(w)
                     if lword['d'] < params['dmax']:
                         self.user_distributions[user['id']] = self.add_distribution(self.user_distributions[user['id']], lword['distribution'])
开发者ID:yamaguchiyuto,项目名称:location_inference,代码行数:16,代码来源:cheng.py

示例6: infer_one

# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_nouns [as 别名]
    def infer_one(self, user_id, params):
        tweets = self.tweets.get(user_id)
        user_words = set([])
        for tweet in tweets:
            user_words |= set(Util.get_nouns(tweet['text'], params['lang']))

        max_location = None
        max_likelihood = -10000000000
        for location in self.model['lf']:
            likelihood = self.tweet_likelihood(user_words, location, params)
            if max_likelihood < likelihood:
                max_likelihood = likelihood
                max_location = location
        return max_location
开发者ID:yamaguchiyuto,项目名称:location_inference,代码行数:16,代码来源:kinsella.py

示例7: calgari

# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_nouns [as 别名]
    def calgari(self, params):
        all_tf = 0.0
        tf = {}
        calgari_value = {}
        pc = {}
        pwc = {}

        for user in self.users.iter():
            location = user['location_point']
            if location != None:
                tweets = self.tweets.get(user['id'])
                user_words = set([])
                city = str(tuple(location))

                if not city in pc: pc[city] = 0
                pc[city] += 1

                for tweet in tweets:
                    user_words |= set(Util.get_nouns(tweet['text'], params['lang']))
                for w in user_words:
                    if not w in pwc:
                        pwc[w] = {city: 1}
                        tf[w] = 0.0
                    elif not city in pwc[w]:
                        pwc[w][city] = 1
                    else:
                        pwc[w][city] += 1
                    all_tf += 1
                    tf[w] += 1

        """ calculating calgari value """
        for w in tf:
            if tf[w] < params['minu']:
                del pwc[w]
            else:
                max_prob = max(pwc[w].values())
                calgari_value[w] = (max_prob / tf[w]) / (tf[w] / all_tf)

        """ building model """
        count = 0
        for w, v in sorted(calgari_value.items(), key=lambda x:x[1], reverse=True):
            count += 1
            if count > params['max_count']:
                if w in pwc: del pwc[w]

        return {'pc':pc, 'pwc':pwc}
开发者ID:yamaguchiyuto,项目名称:location_inference,代码行数:48,代码来源:hecht.py

示例8: infer_one

# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_nouns [as 别名]
    def infer_one(self, user_id):
        tweets = self.tweets.get(user_id)
        user_words = {}
        for tweet in tweets:
            for w in Util.get_nouns(tweet['text'], params['lang']):
                if not w in user_words: user_words[w] = 0
                user_words[w] += 1
        city_probs = {}
        for w in self.model['pwc']:
            for city in self.model['pwc'][w]:
                if not city in city_probs:
                    city_probs[city] = self.model['pc'][city]
                city_probs[city] *= self.model['pwc'][w][city]

        max_city = None
        max_prob = 0
        for city in city_probs:
            if max_prob < city_probs[city]:
                max_prob = city_probs[city]
                max_city = city
        return Util.str_to_tuple(max_city)
开发者ID:yamaguchiyuto,项目名称:location_inference,代码行数:23,代码来源:hecht.py

示例9: update_user_distributions

# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_nouns [as 别名]
 def update_user_distributions(self, tweets, tlwords, params):
     for tweet in tweets:
         user = self.users.get(tweet['user_id'])
         if user != None:
             if user['location_point'] == None:
                 """ unlabeled users """
                 if not user['id'] in self.user_distributions:
                     self.user_distributions[user['id']] = self.init_user_distribution()
                 words = Util.get_nouns(tweet['text'], params['lang'])
                 for w in words:
                     if tlwords.contain(w):
                         """ update using temporally-local word """
                         tlword = tlwords.get(w)
                         self.user_distributions[user['id']] = self.add_distribution(self.user_distributions[user['id']], tlword['distribution'])
                     if self.lwords.contain(w):
                         """ update using local word """
                         lword = self.lwords.get(w)
                         if params['divergence'] in ['l2', 'kl']:
                             if lword['d'] >= params['dmin']:
                                 self.user_distributions[user['id']] = self.add_distribution(self.user_distributions[user['id']], lword['distribution'])
                         else:
                             if lword['d'] <= params['dmin']:
                                 self.user_distributions[user['id']] = self.add_distribution(self.user_distributions[user['id']], lword['distribution'])
开发者ID:yamaguchiyuto,项目名称:location_inference,代码行数:25,代码来源:olim.py


注:本文中的lib.util.Util.get_nouns方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。