本文整理汇总了Python中recsys.algorithm.factorize.SVD.predict方法的典型用法代码示例。如果您正苦于以下问题:Python SVD.predict方法的具体用法?Python SVD.predict怎么用?Python SVD.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类recsys.algorithm.factorize.SVD
的用法示例。
在下文中一共展示了SVD.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: evaluate
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import predict [as 别名]
def evaluate(data, count=5, K=100):
results = []
for i in range(count):
train, test = data.split_train_test(percent=PERCENT_TRAIN)
print len(data.get()), len(train.get()), len(test.get())
#test_in_train(test, train)
#print train.get()
svd = SVD()
svd.set_data(train)
svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)
#Evaluation using prediction-based metrics
rmse = RMSE()
mae = MAE()
for rating, item_id, user_id in test.get():
try:
pred_rating = svd.predict(item_id, user_id)
rmse.add(rating, pred_rating)
mae.add(rating, pred_rating)
except KeyError:
#print "keyerror: ===========================================================>"
continue
try:
rsu = {}
rsu["RMSE"] = rmse.compute()
rsu["MAE"] = mae.compute()
print rsu
results.append(rsu)
except:
print "one error....++++++++++++++++++++++++++++++++++++++++++++++++++++"
return results
示例2: ex1
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import predict [as 别名]
def ex1(dat_file='./ml-1m/ratings.dat',
pct_train=0.5):
data = Data()
data.load(dat_file, sep='::', format={'col':0, 'row':1, 'value':2,'ids':int})
# create train/test split
train, test = data.split_train_test(percent=pct_train)
# create svd
K=100
svd = SVD()
svd.set_data(train)
svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)
# evaluate performance
rmse = RMSE()
mae = MAE()
for rating, item_id, user_id in test.get():
try:
pred_rating = svd.predict(item_id, user_id)
rmse.add(rating, pred_rating)
mae.add(rating, pred_rating)
except KeyError:
continue
print 'RMSE=%s' % rmse.compute()
print 'MAE=%s' % mae.compute()
示例3: recommended_files
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import predict [as 别名]
def recommended_files(data,user):
svd = SVD()
svd.set_data(data)
svd.compute(k=1000,min_values=0, pre_normalize=None, mean_center=False, post_normalize=True)
similar_users = [i[0] for i in svd.similar(user)]
#recoms = svd.recommend(user,is_row=True,only_unknowns=True,n=50)
predict_arr = []
user_tths = db.user_list.find({'user':user})
tths = [i['tth'] for i in user_tths]
movie_names = []
for i in similar_users[1:]:
for j in db.user_list.find({'user':i}):
if j['tth'] not in tths:
movie_name = db.tths.find_one({'tth':j['tth']})['name']
movie_names.append(movie_name)
tths.append(j['tth'])
predict_arr.append((movie_name,j['tth'],svd.predict(user,j['tth'])))
predict_arr = sorted(predict_arr,key=lambda x:x[2],reverse=True)
res = []
c_res = 0
for p in predict_arr:
flag=0
for r in res:
if similar(p[0],r[0]):
flag = 1
break
if flag == 0:
res.append(p[1])
c_res += 1
if c_res > 10:
return res
示例4: get_mae_rmse
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import predict [as 别名]
def get_mae_rmse(step):
data = Data()
format = {'col': 1, 'row': 0, 'value': 2, 'ids': 'str'}
filename = 'second_train_test.dat.{step}'.format(step=step)
data.load(filename, sep='::', format=format)
train, test = data.split_train_test(percent=80)
try:
svd = SVD('svdn_model_{step}.zip'.format(step=step))
print('Loading model... {step}'.format(step=step))
except:
return
mae_predicted, rmse_predicted = [], []
for rating, item_id, user_id in test:
try:
predicted = svd.predict(item_id, user_id)
mae_predicted.append((rating, predicted))
rmse_predicted.append((rating, predicted))
except:
pass
mae_value, rmse_value = np.nan, np.nan
if len(mae_predicted) > 0:
mae = MAE(mae_predicted)
mae_value = mae.compute()
if len(rmse_predicted) > 0:
rmse = RMSE(rmse_predicted)
rmse_value = rmse.compute()
return mae_value, rmse_value
示例5: evaulte
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import predict [as 别名]
def evaulte(train_set, test_set):
svd = SVD()
svd.set_data(train_set)
svd.compute(k=KKK, min_values=MIN_ITEM, pre_normalize=None, mean_center=True, post_normalize=True)
mae = MAE()
k_err = 0
for rating, item_id, user_id in test_set.get():
try:
pred_rating = svd.predict(item_id, user_id)
mae.add(rating, pred_rating)
except KeyError:
#print "keyerror: ===========================================================>"
k_err += 1
continue
print "k_err", k_err, " -- ", "test-len: ", len(test_set.get()), "train-len: ", len(train_set.get())
result = mae.compute()/2.0
return result
示例6: calculate_stats_users
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import predict [as 别名]
def calculate_stats_users(pct_train):
dat_file = 'user_data_working.csv'
data = Data()
data.load(dat_file, sep=',', format={'col':0, 'row':1, 'value':2,'ids':int})
train, test = data.split_train_test(percent=pct_train)
svd = SVD()
svd.set_data(train)
svd.compute(k=100, min_values=2, pre_normalize=None, mean_center=True,
post_normalize=False)
rmse = RMSE()
mae = MAE()
for rating, item_id, user_id in test.get():
try:
pred_rating = svd.predict(item_id, user_id)
rmse.add(rating, pred_rating)
mae.add(rating, pred_rating)
except KeyError:
continue
print 'RMSE=%s' % rmse.compute()
print 'MAE=%s\n' % mae.compute()
示例7: ex1
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import predict [as 别名]
def ex1(dat_file='ml-1m/ratings.dat',
pct_train=0.5):
data = Data()
data.load(dat_file, sep='::', format={'col':0, 'row':1, 'value':2,
'ids':int})
# About format parameter:
# 'row': 1 -> Rows in matrix come from column 1 in ratings.dat file
# 'col': 0 -> Cols in matrix come from column 0 in ratings.dat file
# 'value': 2 -> Values (Mij) in matrix come from column 2 in ratings.dat
# file
# 'ids': int -> Ids (row and col ids) are integers (not strings)
# create train/test split
train, test = data.split_train_test(percent=pct_train)
# create svd
K = 100
svd = SVD()
svd.set_data(train)
svd.compute(
k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)
# evaluate performance
rmse = RMSE()
mae = MAE()
for rating, item_id, user_id in test.get():
try:
pred_rating = svd.predict(item_id, user_id)
rmse.add(rating, pred_rating)
mae.add(rating, pred_rating)
except KeyError:
continue
print 'RMSE=%s' % rmse.compute()
print 'MAE=%s' % mae.compute()
示例8: quickstart
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import predict [as 别名]
def quickstart():
svd = SVD()
recsys.algorithm.VERBOSE = True
# load movielens data
dat_file = 'ml-1m/ratings.dat'
svd.load_data(filename=dat_file, sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int})
# compute svd
k = 100
svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True,
post_normalize=True)
pdb.set_trace()
# movie id's
ITEMID1 = 1 # toy story
ITEMID2 = 1221 # godfather II
# get movies similar to toy story
svd.similar(ITEMID1)
# get predicted rating for given user & movie
MIN_RATING = 0.0
MAX_RATING = 5.0
USERID = 1
ITEMID = 1
# get predicted rating
pred = svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING)
actual = svd.get_matrix().value(ITEMID, USERID)
print 'predicted rating = {0}'.format(pred)
print 'actual rating = {0}'.format(actual)
# which users should see Toy Story?
svd.recommend(ITEMID)
示例9: Recommender
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import predict [as 别名]
class Recommender():
def __init__(self, train, test):
recsys.algorithm.VERBOSE = True
self.train = train
self.test = test
self.svd = SVD()
self.svd.set_data(train)
def set_train(self, train):
self.train = train
def set_test(self, test):
self.test = test
def get_train(self):
return self.train
def get_test(self):
return self.test
def get_alluserid(self, dataset):
userid_list = []
for rating, item_id, user_id in dataset.get():
if user_id not in userid_list:
userid_list.append(user_id)
return userid_list
def get_allitemid(self, dataset):
itemid_list = []
for rating, item_id, user_id in dataset.get():
if item_id not in itemid_list:
itemid_list.append(item_id)
return itemid_list
def eval_rmse(self):
# Evaluation using prediction-based metrics
rmse = RMSE()
mae = MAE()
for rating, item_id, user_id in self.test.get():
try:
pred_rating = self.svd.predict(item_id, user_id)
rmse.add(rating, pred_rating)
mae.add(rating, pred_rating)
except KeyError:
continue
print 'RMSE=%s' % rmse.compute()
print 'MAE=%s' % mae.compute()
def recommend(self, N=10, only_unknowns=False, is_row=True):
rec_list = {}
for rating, item_id, user_id in self.test.get():
if user_id in self.get_alluserid(self.train):
rec_list[user_id] = self.svd.recommend(user_id, n=N, only_unknowns=False, is_row=False)
print rec_list[user_id]
return rec_list
def precisionRecall(self, rec_list2, test_dict):
print "Start calculate precision and recall..."
hit = 0
n_recall = 0
n_precision = 0
for user, items in test_dict.items():
if user not in self.get_alluserid(self.train):
continue
rec_list = self.svd.recommend(user, n=30, only_unknowns=False, is_row=False)
r = [i[0] for i in rec_list]
print 'rec_list', r
hit += len(list(set(r) & set(items.keys())))
n_recall += len(items)
n_precision += 30
return [hit / (1.0 * n_recall), hit / (1.0 * n_precision)]
示例10: print
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import predict [as 别名]
print(json.dumps(similaries, ensure_ascii=False))
# import pdb;pdb.set_trace()
import sys
sys.exit(0)
print(svd.similar(ITEMID1))
# Returns: <ITEMID, Cosine Similarity Value>
MIN_RATING = 0.0
MAX_RATING = 1.0
ITEMID = 109
USERID = 3837663637323963363639393565373833613237396534393132376338386362
print('testing..')
print(svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING))
# Predicted value 5.0
print(svd.get_matrix().value(ITEMID, USERID))
# Real value 5.0
# Recommend (non-rated) movies to a user:
print('recommend to user')
print(svd.recommend(USERID, is_row=False)) #cols are users and rows are items, thus we set is_row=False
print(svd.recommend(ITEMID))
import pdb;pdb.set_trace()
示例11: RMSE
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import predict [as 别名]
# Compute SVD
svd.compute(k=K, min_values=None, pre_normalize=None, mean_center=True, post_normalize=True)
svd_neig.compute(k=K, min_values=None, pre_normalize=None, mean_center=True, post_normalize=True)
# Evaluate
rmse_svd = RMSE()
mae_svd = MAE()
rmse_svd_neig = RMSE()
mae_svd_neig = MAE()
i = 1
total = len(test.get())
print "Total Test ratings: %s" % total
for rating, item_id, user_id in test:
try:
pred_rating_svd = svd.predict(item_id, user_id)
rmse_svd.add(rating, pred_rating_svd)
mae_svd.add(rating, pred_rating_svd)
pred_rating_svd_neig = svd_neig.predict(item_id, user_id) # Koren & co.
if pred_rating_svd_neig is not nan:
rmse_svd_neig.add(rating, pred_rating_svd_neig)
mae_svd_neig.add(rating, pred_rating_svd_neig)
print "\rProcessed test rating %d" % i,
sys.stdout.flush()
i += 1
except KeyError:
continue
示例12: get_name_item_reviewed
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import predict [as 别名]
#3.10
[items_full[str(x[0])].get_data() for x in films]
#3.11
get_name_item_reviewed(10,user_full,items_full)
#3.12
items_full[str(2628)].get_data()
users_for_star_wars = svd.recommend(2628,only_unknowns=True)
users_for_star_wars
#3.13
movies_reviewed_by_sw_rec =[get_name_item_reviewed(x[0],user_full,items_full) for x in users_for_star_wars]
movies_flatten = [movie for movie_list in movies_reviewed_by_sw_rec for movie in movie_list]
movie_aggregate = movies_by_category(movies_flatten, 3)
movies_sort = sorted(movie_aggregate,key=lambda x: x[1], reverse=True)
movies_sort
#3.14
from recsys.evaluation.prediction import RMSE
err = RMSE()
for rating, item_id, user_id in data.get():
try:
prediction = svd.predict(item_id, user_id)
err.add(rating, prediction)
except KeyError, k:
continue
print 'RMSE is ' + str(err.compute())
示例13: __init__
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import predict [as 别名]
class Recommender:
def __init__(self, datafile_path=None):
self.svd = SVD()
self.matrix = None
self.datafile_path = datafile_path
self.predict_matrix = None
self.load_local_data(self.datafile_path, 100, 0)
def load_web_data(self, filename, film_names_with_rate_list, K, min_values,
MAX_COUNT_USER_FILMS=None, MAX_COUNT_FILM_USERS=None):
self.matrix = rm.MatrixCreator(MAX_COUNT_USER_FILMS, MAX_COUNT_FILM_USERS).\
create_matrix_by_film_titles(film_names_with_rate_list)
self.matrix.save_rating_matrix_as_file(filename)
self.datafile_path = filename
self.__compute_matrix(K, min_values)
def load_local_data(self, filename, K, min_values):
self.matrix = rm.MatrixCreator().restore_from_file(filename)
self.datafile_path = filename
self.__compute_matrix(K, min_values)
def get_predictions_for_all_users(self, min_rate=1, max_rate=10, top = None, K=None, min_values=0):
if K:
self.__compute_matrix(K)
self.predict_matrix = np.zeros((len(self.matrix.users_indexes_map), len(self.matrix.films_indexes_map)))
for user in self.matrix.users_indexes_map.keys():
for film in self.matrix.films_indexes_map.keys():
user_index = self.matrix.users_indexes_map[user]
film_index = self.matrix.films_indexes_map[film]
self.predict_matrix[user_index][film_index] = self.svd.predict(user_index, film_index, MIN_VALUE=min_rate, MAX_VALUE=max_rate)
return self.predict_matrix
def predict_for_user(self, user_index, min_rate=1, max_rate=10, top = None, repeat=False, K=None, min_values=None):
"""
:param K: to change the number of properties
:return: {Film : int(rate), ...} or
[(Film, int(rate)), ...] if top is not None
"""
if K:
self.__compute_matrix(K)
prediction = {}
np_matrix = self.matrix.get_rating_matrix()
for index in xrange(np_matrix.shape[1]):
rate = self.svd.predict(user_index, index,
MIN_VALUE=min_rate,
MAX_VALUE=max_rate)
film = self.matrix.indexes_films_map[index]
prediction[film] = rate
if not repeat:
fake_user_index = self.matrix.indexes_with_fake_user_ids.keys()[0]
user = self.matrix.indexes_users_map[fake_user_index]
films = user.get_preferences().keys()
prediction = [(x, prediction[x]) for x in prediction if x not in films]
if top:
prediction = sorted(prediction.items(), key=operator.itemgetter(1))
prediction = list(reversed(prediction[-top:]))
return prediction
def predict_for_all_fake_users(self, min_rate=1, max_rate=10, top = None, K=None, min_values=0):
"""
:param K: to change the number of properties
:return: [{Film : int(rate), ...}, ...]
"""
if K:
self.__compute_matrix(K)
predictions = []
for user_index in self.matrix.indexes_with_fake_user_ids.keys():
prediction = self.predict_for_user(user_index, min_rate, max_rate, top)
predictions.append(prediction)
return predictions
def predicted_rating_submatrix(self, user_indexes):
self.__compute_matrix(100)
predicted = np.empty((1, self.matrix.rating_matrix.shape[1]), int)
for index in user_indexes:
row = []
for film_index in xrange(self.matrix.rating_matrix.shape[1]):
row.append(self.svd.predict(index, film_index,
MIN_VALUE=1,
MAX_VALUE=10))
predicted = np.append(predicted, [row], axis=0)
return predicted[1:]
def predicted_rating_submatrix_for_fake(self):
return self.predicted_rating_submatrix(self.matrix.indexes_with_fake_user_ids.keys())
def __compute_matrix(self, K,
min_values=0,
pre_normalize=None,
#.........这里部分代码省略.........
示例14: print
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import predict [as 别名]
k = 100
svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True)
# predicted_rating = svd.predict(int(5), 'A1', 1, 10)
# predicted_rating2 = svd.predict(int(1), 'A1', 1, 10)
# print('Predicted rating', predicted_rating)
# print('Predicted rating', predicted_rating2)
records = ETLUtils.load_csv_file(file_name_header, '|')
errors = []
for record in records:
try:
# print(record['user'], record['item'], record['rating'])
user = record['user']
item = int(record['item'])
predicted_rating = svd.predict(item, user, 1, 5)
print(record['user'], record['item'], predicted_rating)
# predicted_rating = round(predicted_rating)
actual_rating = svd.get_matrix().value(item, user)
error = abs(predicted_rating - actual_rating)
errors.append(error)
except KeyError:
continue
mean_absolute_error = MeanAbsoluteError.compute_list(errors)
root_mean_square_error = RootMeanSquareError.compute_list(errors)
print('Mean Absolute error: %f' % mean_absolute_error)
print('Root mean square error: %f' % root_mean_square_error)
示例15: Data
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import predict [as 别名]
from recsys.algorithm.factorize import SVD
from recsys.evaluation.prediction import RMSE, MAE
import sys
#Dataset
#PERCENT_TRAIN = 80
data = Data()
data.load('./ml-1m/ratings.dat', sep='::', format={'col':0, 'row':1, 'value':2, 'ids':int})
#Load SVD from /tmp
svd2 = SVD(filename='/tmp/movielens') # Loading already computed SVD model
#Predict User rating for given user and movie:
USERID = 2
ITEMID= 1 # Toy Story
rating1=svd2.predict(ITEMID, USERID, 0.0, 5.0)
print 'Predicted rating=%f'% rating1
flag=0
#Retrieve actual rating for given user and movie
for rating, item_id, user_id in data.get():
if user_id == USERID and item_id == ITEMID:
rat = rating
#print 'Actual rating=%f' % rating
flag=1
break
if flag == 1:
print 'Actual rating=%f'% rat
else :
sys.exit("No actual rating available")