本文整理汇总了Python中lib.util.Util.get_words方法的典型用法代码示例。如果您正苦于以下问题:Python Util.get_words方法的具体用法?Python Util.get_words怎么用?Python Util.get_words使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lib.util.Util
的用法示例。
在下文中一共展示了Util.get_words方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: extract_local_words_batch
# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_words [as 别名]
def extract_local_words_batch(self, params):
lwords = {}
word_counts = {}
for user in self.users.iter():
location = user['location_point']
if location != None:
city = str(self.model.predict([location])[0])
tweets = self.tweets.get(user['id'])
user_words = set([])
for tweet in tweets:
user_words |= set(Util.get_words(tweet['text']))
for w in user_words:
if not w in word_counts:
word_counts[w] = {city: 1}
elif not city in word_counts[w]:
word_counts[w][city] = 1
else:
word_counts[w][city] += 1
""" calculating divergences """
for w in word_counts:
N = float(sum([v for v in word_counts[w].values()]))
if N >= params['cmin']:
d = self.calc_divergence(N, word_counts[w], params)
if self.check_divergence(d, params) == True:
lwords[w] = {'word':w, 'd':d, 'distribution':word_counts[w]} # save as dict
return Words(lwords)
示例2: extract_local_words
# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_words [as 别名]
def extract_local_words(self, tweets, params):
lwords = {}
word_counts = {}
""" making user sets """
user_sets = {}
for tweet in tweets:
words = Util.get_words(tweet['text'])
for w in words:
if not w in user_sets: user_sets[w] = set([])
user_sets[w].add(tweet['user_id'])
""" making word distributions """
for w in user_sets:
for user_id in user_sets[w]:
user = self.users.get(user_id)
if user != None:
location = user['location_point']
if location != None:
""" labeled user """
if not w in word_counts: word_counts[w] = {}
city = str(self.model.predict([location])[0])
if not city in word_counts[w]: word_counts[w][city] = 0
word_counts[w][city] += 1
""" calculating divergences """
for w in word_counts:
N = float(sum([v for v in word_counts[w].values()]))
if N >= params['cmin']:
d = self.calc_divergence(N, word_counts[w], params)
if self.check_divergence(d, params) == True:
lwords[w] = {'word':w, 'd':d, 'distribution':word_counts[w]} # save as dict
return Words(lwords)
示例3: update_user_distributions
# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_words [as 别名]
def update_user_distributions(self, tweets, tlwords, params):
for tweet in tweets:
neighbors = self.get_neighbors(tweet['user_id'], params)
users = neighbors | set([tweet['user_id']])
for user_id in users:
user = self.users.get(user_id)
if user != None:
if user['location_point'] == None:
""" unlabeled users """
if not user['id'] in self.user_distributions:
self.user_distributions[user['id']] = self.init_user_distribution()
words = Util.get_words(tweet['text'])
for w in words:
if tlwords.contain(w):
""" update using temporally-local word """
tlword = tlwords.get(w)
self.user_distributions[user['id']] = self.add_distribution(self.user_distributions[user['id']], tlword['distribution'])
if self.lwords.contain(w):
""" update using local word """
lword = self.lwords.get(w)
if params['divergence'] in ['l2', 'kl']:
if lword['d'] >= params['dmin']:
self.user_distributions[user['id']] = self.add_distribution(self.user_distributions[user['id']], lword['distribution'])
else:
if lword['d'] <= params['dmin']:
self.user_distributions[user['id']] = self.add_distribution(self.user_distributions[user['id']], lword['distribution'])