本文整理汇总了Python中lib.util.Util.get_nouns方法的典型用法代码示例。如果您正苦于以下问题:Python Util.get_nouns方法的具体用法?Python Util.get_nouns怎么用?Python Util.get_nouns使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lib.util.Util
的用法示例。
在下文中一共展示了Util.get_nouns方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: extract_local_words_batch
# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_nouns [as 别名]
def extract_local_words_batch(self, params):
lwords = {}
word_counts = {}
for user in self.users.iter():
location = user['location_point']
if location != None:
tweets = self.tweets.get(user['id'])
user_words = set([])
city = str(tuple(location))
for tweet in tweets:
user_words |= set(Util.get_nouns(tweet['text'], params['lang']))
for w in user_words:
if not w in word_counts:
word_counts[w] = {city: 1}
elif not city in word_counts[w]:
word_counts[w][city] = 1
else:
word_counts[w][city] += 1
""" calculating divergences """
for w in word_counts:
N = float(sum([v for v in word_counts[w].values()]))
if N >= params['cmin']:
d = self.calc_dispersion(word_counts[w], params)
if d < params['dmax']:
lwords[w] = {'word':w, 'd':d, 'distribution':word_counts[w]} # save as dict
return Words(lwords)
示例2: learn
# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_nouns [as 别名]
def learn(self, params):
tf = {} # term frequency in a location
lf = {} # location frequency
global_tf = {} # term frequency
all_tf = 0.0
for user in self.users.iter():
location = user['location_point']
if location != None:
tweets = self.tweets.get(user['id'])
user_words = set([])
location = tuple(location)
if not location in tf: tf[location] = {}
if not location in lf: lf[location] = 0
for tweet in tweets:
user_words |= set(Util.get_nouns(tweet['text'], params['lang']))
for w in user_words:
if not w in tf[location]: tf[location][w] = 0
if not w in global_tf: global_tf[w] = 0
tf[location][w] += 1
global_tf[w] += 1
lf[location] += 1
all_tf += 1
for w in global_tf.keys():
if global_tf[w] < params['mincount']:
del global_tf[w]
else:
global_tf[w] /= all_tf
return {'tf':tf, 'global_tf':global_tf, 'lf':lf}
示例3: infer
# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_nouns [as 别名]
def infer(self, qtree):
self.ud = UserDistribution(self.params['N'])
self.wd = WordDistribution(self.params['N'])
self.window = Window(self.params['N'])
self.kl = KL(params['N'], self.population)
for tweet in self.tweets.stream():
if type(tweet) == type({}) and 'timestamp' in tweet:
user = self.users.get(tweet['user_id'])
if user != None:
cl = user['location'] # user who posts this tweet
words = set(Util.get_nouns(tweet['text'], self.params['lang'])) # words contained in this tweet
if cl != None:
""" labeled user """
aid = qtree.get_area_id(cl)
if aid != None:
self.updateKL(user, qtree.get_area_id(cl), words)
else:
""" unlabeled user """
self.updateUD(user, words, self.params['dmin'])
""" Location prediction using user distribution """
for user in self.users.iter():
if user['location'] == None:
""" unlabeled user """
ud = self.ud.get(user['id'])
if ud != None:
""" at least one observation """
inferred_location_number = self.predict(ud)
inferred_location_coordinates = qtree.leaves[inferred_location_number].center
user['location'] = inferred_location_coordinates
示例4: extract_local_words
# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_nouns [as 别名]
def extract_local_words(self, tweets, params):
lwords = {}
word_counts = {}
""" making user sets """
user_sets = {}
for tweet in tweets:
words = Util.get_nouns(tweet['text'], params['lang'])
for w in words:
if not w in user_sets: user_sets[w] = set([])
user_sets[w].add(tweet['user_id'])
""" making word distributions """
for w in user_sets:
for user_id in user_sets[w]:
user = self.users.get(user_id)
if user != None:
location = user['location_point']
if location != None:
""" labeled user """
if not w in word_counts: word_counts[w] = {}
city = str(self.model.predict([location])[0])
if not city in word_counts[w]: word_counts[w][city] = 0
word_counts[w][city] += 1
""" calculating divergences """
for w in word_counts:
N = float(sum([v for v in word_counts[w].values()]))
if N >= params['cmin']:
d = self.calc_divergence(N, word_counts[w], params)
if self.check_divergence(d, params) == True:
lwords[w] = {'word':w, 'd':d, 'distribution':word_counts[w]} # save as dict
return Words(lwords)
示例5: update_user_distributions
# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_nouns [as 别名]
def update_user_distributions(self, tweet, params):
user = self.users.get(tweet['user_id'])
if user != None:
if user['location_point'] == None:
""" unlabeled users """
if not user['id'] in self.user_distributions:
self.user_distributions[user['id']] = self.init_user_distribution()
words = Util.get_nouns(tweet['text'], params['lang'])
for w in words:
if self.lwords.contain(w):
""" update using local word """
lword = self.lwords.get(w)
if lword['d'] < params['dmax']:
self.user_distributions[user['id']] = self.add_distribution(self.user_distributions[user['id']], lword['distribution'])
示例6: infer_one
# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_nouns [as 别名]
def infer_one(self, user_id, params):
tweets = self.tweets.get(user_id)
user_words = set([])
for tweet in tweets:
user_words |= set(Util.get_nouns(tweet['text'], params['lang']))
max_location = None
max_likelihood = -10000000000
for location in self.model['lf']:
likelihood = self.tweet_likelihood(user_words, location, params)
if max_likelihood < likelihood:
max_likelihood = likelihood
max_location = location
return max_location
示例7: calgari
# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_nouns [as 别名]
def calgari(self, params):
all_tf = 0.0
tf = {}
calgari_value = {}
pc = {}
pwc = {}
for user in self.users.iter():
location = user['location_point']
if location != None:
tweets = self.tweets.get(user['id'])
user_words = set([])
city = str(tuple(location))
if not city in pc: pc[city] = 0
pc[city] += 1
for tweet in tweets:
user_words |= set(Util.get_nouns(tweet['text'], params['lang']))
for w in user_words:
if not w in pwc:
pwc[w] = {city: 1}
tf[w] = 0.0
elif not city in pwc[w]:
pwc[w][city] = 1
else:
pwc[w][city] += 1
all_tf += 1
tf[w] += 1
""" calculating calgari value """
for w in tf:
if tf[w] < params['minu']:
del pwc[w]
else:
max_prob = max(pwc[w].values())
calgari_value[w] = (max_prob / tf[w]) / (tf[w] / all_tf)
""" building model """
count = 0
for w, v in sorted(calgari_value.items(), key=lambda x:x[1], reverse=True):
count += 1
if count > params['max_count']:
if w in pwc: del pwc[w]
return {'pc':pc, 'pwc':pwc}
示例8: infer_one
# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_nouns [as 别名]
def infer_one(self, user_id):
tweets = self.tweets.get(user_id)
user_words = {}
for tweet in tweets:
for w in Util.get_nouns(tweet['text'], params['lang']):
if not w in user_words: user_words[w] = 0
user_words[w] += 1
city_probs = {}
for w in self.model['pwc']:
for city in self.model['pwc'][w]:
if not city in city_probs:
city_probs[city] = self.model['pc'][city]
city_probs[city] *= self.model['pwc'][w][city]
max_city = None
max_prob = 0
for city in city_probs:
if max_prob < city_probs[city]:
max_prob = city_probs[city]
max_city = city
return Util.str_to_tuple(max_city)
示例9: update_user_distributions
# 需要导入模块: from lib.util import Util [as 别名]
# 或者: from lib.util.Util import get_nouns [as 别名]
def update_user_distributions(self, tweets, tlwords, params):
for tweet in tweets:
user = self.users.get(tweet['user_id'])
if user != None:
if user['location_point'] == None:
""" unlabeled users """
if not user['id'] in self.user_distributions:
self.user_distributions[user['id']] = self.init_user_distribution()
words = Util.get_nouns(tweet['text'], params['lang'])
for w in words:
if tlwords.contain(w):
""" update using temporally-local word """
tlword = tlwords.get(w)
self.user_distributions[user['id']] = self.add_distribution(self.user_distributions[user['id']], tlword['distribution'])
if self.lwords.contain(w):
""" update using local word """
lword = self.lwords.get(w)
if params['divergence'] in ['l2', 'kl']:
if lword['d'] >= params['dmin']:
self.user_distributions[user['id']] = self.add_distribution(self.user_distributions[user['id']], lword['distribution'])
else:
if lword['d'] <= params['dmin']:
self.user_distributions[user['id']] = self.add_distribution(self.user_distributions[user['id']], lword['distribution'])