本文整理匯總了Python中recommender.Recommender.calc_neighbors方法的典型用法代碼示例。如果您正苦於以下問題:Python Recommender.calc_neighbors方法的具體用法?Python Recommender.calc_neighbors怎麽用?Python Recommender.calc_neighbors使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類recommender.Recommender
的用法示例。
在下文中一共展示了Recommender.calc_neighbors方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: run
# 需要導入模塊: from recommender import Recommender [as 別名]
# 或者: from recommender.Recommender import calc_neighbors [as 別名]
def run(source, target, num_topics = 100, passes = 20, lang = 'en', distance_measure = euclidean, percentage = 0.05):
"""
Main entry point for this package. Contains and executes the whole data pipeline.
Arguments:
source -- The path string to the source file containing all reviews
target -- The path string to the target directory where the neighbors for all users will be saved
Keyword arguments:
num_topics -- The number of topics LDA is supposed to discover (default 100)
passes -- The number of iterations for the statistical inference algorithm (default 20)
lang -- The language the reviews shall be sorted by (default 'en')
distance_measure -- A python function that measures the distance between two vectors in a num_topics-dimensional vector space.
Must take two numpy arrays and return a float. (default euclidean)
percentage -- The cutoff for being a close neighbor, i.e. two users are close if their distance is
within the closest percentage percent of all distances (default 0.05)
"""
with open(source) as f:
all_reviews = []
for line in f:
all_reviews.append(json.loads(line))
reviews = filter_by_language(all_reviews, lang)
rt = ReviewTokenizer(reviews)
rt.tokenize()
db = DictionaryBuilder(rt.tokenized_docs)
db.build()
dtmb = DTMBuilder(db.dictionary, db.srcTexts)
dtmb.build()
ldaw = LDAWrapper(dtmb.dtm, db.dictionary)
ldaw.run(num_topics = num_topics, passes = passes)
modelwrapper = LDAModelWrapper(ldaw.ldamodel, db.dictionary, sortByUsers(rt.tokenized_docs))
posteriors = modelwrapper.get_all_posteriors()
means = {}
for key, value in posteriors.iteritems():
means[key] = mean(value).tolist()
x = Recommender(means)
y = x.calc_distances(distance_measure)
threshhold = fivePercent(y, percentage)
for user in means.iterkeys():
z = x.calc_neighbors(user, distance_measure, threshhold = threshhold)
if len(target) > 0:
fileName = target + '/' + user + '.json'
else:
fileName = user + '.json'
with open(fileName, 'w') as g:
json.dump(z, g)
示例2: euclidean
# 需要導入模塊: from recommender import Recommender [as 別名]
# 或者: from recommender.Recommender import calc_neighbors [as 別名]
import json
from ldamodelwrapper import LdaModelWrapper as LMW
from gensim import corpora
import os
import numpy as np
from recommender import Recommender
def euclidean(x,y):
return np.sqrt(np.sum((x-y)**2))
userCurrPart = []
with open('parts/part5.json') as f:
for line in f:
dct = json.loads(line)
key = dct.keys()[0]
userCurrPart.append(key)
with open('means.json') as f:
means = json.loads(f.read())
x = Recommender(means)
for user in userCurrPart:
y = x.calc_neighbors(user, euclidean, threshhold = 0.21)
with open('close_neighbors/close_neighbors_neighbors_' + user + '.json', 'w') as f:
json.dump(y, f)
#neighbors[user] = y