本文整理汇总了Python中recsys.algorithm.factorize.SVD.set_data方法的典型用法代码示例。如果您正苦于以下问题:Python SVD.set_data方法的具体用法?Python SVD.set_data怎么用?Python SVD.set_data使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类recsys.algorithm.factorize.SVD
的用法示例。
在下文中一共展示了SVD.set_data方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_classifier
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import set_data [as 别名]
def test_classifier(model, filename=None, itemkey="track", selector="SELECT * FROM train"):
conn = sqlite3.connect("db.sqlite")
conn.row_factory = dict_factory
cur = conn.cursor()
s = 0
c = 0
t_p = 0
for i in range(0,10):
svd = SVD()
if filename is not None:
svd.load_model(filename)
l = list(cur.execute(selector))
random.shuffle(l)
count = len(l)
svd.set_data([(x["rating"],x["track"],x["user"]) for x in l[0:int(count*0.7)]])
K = 1000
svd.compute(k=K, min_values=0.0, pre_normalize=None, mean_center=True, post_normalize=True)
pairs = []
for idx,item in enumerate(l[int(count*0.7):]):
user = item["user"]
track = item[itemkey]
pairs.append((predict_item(svd, track,user), item["rating"]))
t_p += len(pairs)
s += RMSE(pairs).compute()
c += 1.0
print "iteration"
print s/c, t_p
示例2: train_and_save
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import set_data [as 别名]
def train_and_save(filename):
step = filename.split('.')[-1]
data = Data()
format = {'col': 1, 'row': 0, 'value': 2, 'ids': 'str'}
data.load(filename, sep='::', format=format)
train, test = data.split_train_test(percent=80)
try:
svd = SVD('svdn_model_{step}.zip'.format(step=step))
print('Already exists: svdn_model_{step}.zip'.format(step=step))
except:
svd = SVD()
svd.set_data(train)
svd.compute(
k=100,
min_values=2,
pre_normalize=False,
mean_center=True,
post_normalize=True,
savefile='svdn_model_{step}'.format(step=step)
)
print('Saved svdn_model_{step}.zip'.format(step=step))
示例3: compute_SVD
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import set_data [as 别名]
def compute_SVD():
svd = SVD()
svd.set_data(load_data())
K=100
svd.compute(k=K, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True, savefile=None)
svd.save_model(os.path.join(utils.get_add_dir(), 'ratings'))
示例4: main
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import set_data [as 别名]
def main():
svd = SVD()
train = Data()
test = Data()
train.load('randUser/rate1.csv', force=True, sep=',', format={'col':0, 'row':1, 'value':2, 'ids':int})
test.load('randUser/rate1.csv', force=True, sep=',', format={'col':0, 'row':1, 'value':2, 'ids':int})
svd.set_data(train)
svd.compute(k=100, min_values=0.5, pre_normalize=False, mean_center=True, post_normalize=True)
# rmse = RMSE()
# mae = MAE()
# for rating, item_id, user_id in test.get():
# try:
# pred_rating = svd.predict(item_id, user_id)
# rmse.add(rating, pred_rating)
# mae.add(rating, pred_rating)
# except KeyError:
# continue
# print 'RMSE=%s' % rmse.compute()
# print 'MAE=%s' % mae.compute()
# test = make_test()
# print precision_and_recall(test, svd)
# rec_list = svd.recommend(200, n=5, only_unknowns=False, is_row=False)
print svd.recommend(1, n=5, only_unknowns=False, is_row=False)
示例5: evaluate
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import set_data [as 别名]
def evaluate(data, count=5, K=100):
results = []
for i in range(count):
train, test = data.split_train_test(percent=PERCENT_TRAIN)
print len(data.get()), len(train.get()), len(test.get())
#test_in_train(test, train)
#print train.get()
svd = SVD()
svd.set_data(train)
svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)
#Evaluation using prediction-based metrics
rmse = RMSE()
mae = MAE()
for rating, item_id, user_id in test.get():
try:
pred_rating = svd.predict(item_id, user_id)
rmse.add(rating, pred_rating)
mae.add(rating, pred_rating)
except KeyError:
#print "keyerror: ===========================================================>"
continue
try:
rsu = {}
rsu["RMSE"] = rmse.compute()
rsu["MAE"] = mae.compute()
print rsu
results.append(rsu)
except:
print "one error....++++++++++++++++++++++++++++++++++++++++++++++++++++"
return results
示例6: recommended_files
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import set_data [as 别名]
def recommended_files(data,user):
svd = SVD()
svd.set_data(data)
svd.compute(k=1000,min_values=0, pre_normalize=None, mean_center=False, post_normalize=True)
similar_users = [i[0] for i in svd.similar(user)]
#recoms = svd.recommend(user,is_row=True,only_unknowns=True,n=50)
predict_arr = []
user_tths = db.user_list.find({'user':user})
tths = [i['tth'] for i in user_tths]
movie_names = []
for i in similar_users[1:]:
for j in db.user_list.find({'user':i}):
if j['tth'] not in tths:
movie_name = db.tths.find_one({'tth':j['tth']})['name']
movie_names.append(movie_name)
tths.append(j['tth'])
predict_arr.append((movie_name,j['tth'],svd.predict(user,j['tth'])))
predict_arr = sorted(predict_arr,key=lambda x:x[2],reverse=True)
res = []
c_res = 0
for p in predict_arr:
flag=0
for r in res:
if similar(p[0],r[0]):
flag = 1
break
if flag == 0:
res.append(p[1])
c_res += 1
if c_res > 10:
return res
示例7: ex1
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import set_data [as 别名]
def ex1(dat_file='./ml-1m/ratings.dat',
pct_train=0.5):
data = Data()
data.load(dat_file, sep='::', format={'col':0, 'row':1, 'value':2,'ids':int})
# create train/test split
train, test = data.split_train_test(percent=pct_train)
# create svd
K=100
svd = SVD()
svd.set_data(train)
svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)
# evaluate performance
rmse = RMSE()
mae = MAE()
for rating, item_id, user_id in test.get():
try:
pred_rating = svd.predict(item_id, user_id)
rmse.add(rating, pred_rating)
mae.add(rating, pred_rating)
except KeyError:
continue
print 'RMSE=%s' % rmse.compute()
print 'MAE=%s' % mae.compute()
示例8: SVDtrain2
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import set_data [as 别名]
def SVDtrain2(data,pct_train):
train, test = data.split_train_test(percent=pct_train)
K=100
svd = SVD()
svd.set_data(train)
svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True,
post_normalize=True)
return svd,train,test
示例9: recommended_files
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import set_data [as 别名]
def recommended_files(user):
if not type(user) is str:
user = unidecode.unidecode(user)
if db.done_users.find_one({'user':user})['recommended']==False:
user_files = db.user_list.find({'user':user})
f = open('./dc_recom.dat','a')
for u in user_files:
f.write(u['user'] + '::' + u['tth'])
f.write('\n')
f.close()
db.done_users.update({'user': user}, {'user':user, 'recommended': True})
data = Data()
data.load('./dc_recom.dat', sep='::', format={'col':1,'row':0})
svd = SVD()
svd.set_data(data)
svd.compute(k=1000,min_values=0, pre_normalize=None, mean_center=False, post_normalize=True)
similar_users = [i[0] for i in svd.similar(user,n=10)]
newdata = Data()
for i in range(0,len(similar_users),1):
files = db.user_list.find({'user':similar_users[i]})
for f in files:
newdata.add_tuple((1.0,similar_users[i],f['tth']))
svd.set_data(newdata)
svd.compute(k=1000,min_values=0, pre_normalize=None, mean_center=False, post_normalize=True)
recoms = svd.recommend(user,is_row=True,only_unknowns=True,n=100)
res = []
c_res = 0
for p in recoms:
flag=0
for r in res:
if similar(db.tths.find_one({'tth':p[0]})['name'],db.tths.find_one({'tth':r[0]})['name']):
flag = 1
break
if flag == 0:
res.append(p)
c_res += 1
if c_res > 10:
k = []
for i in res:
try:
j = 'magnet:?xt=urn:tree:tiger:'+i[0] + "&dn=" + unidecode.unidecode(db.tths.find_one({'tth': i[0]})['name'])
except:
j = 'magnet:?xt=urn:tree:tiger:'+i[0]
k.append(j)
return k
k = []
for i in res:
try:
j = 'magnet:?xt=urn:tree:tiger:'+i[0] + "&dn=" + unidecode.unidecode(db.tths.find_one({'tth': i[0]})['name'])
except:
j = 'magnet:?xt=urn:tree:tiger:'+i[0]
k.append(j)
return k
示例10: build_model
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import set_data [as 别名]
def build_model(self,uids,kn):
data = Data()
for uid,songs in uids.items():
for song in songs:
data.add_tuple((1,song,uid))
svd = SVD()
svd.set_data(data)
svd.compute(k=kn,min_values=1)
self.model = svd
示例11: train_svd
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import set_data [as 别名]
def train_svd(data):
"""
This method load processed data and modelling data using Singular Value Decomposition
:return: SVD model
"""
svd = SVD()
svd.set_data(get_data_model_matrix(data))
k = 30
svd.compute(k=k, min_values=0, pre_normalize=None, mean_center=True, post_normalize=True)
return svd
示例12: calculate_stats_features
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import set_data [as 别名]
def calculate_stats_features(pct_train):
dat_file='feature_matrix.csv'
data = Data()
data.load(dat_file, sep=',', format={'col':0, 'row':1, 'value':2,'ids':int})
train, test = data.split_train_test(percent=pct_train)
K=100
svd = SVD()
svd.set_data(train)
svd.compute(k=K, min_values=0, pre_normalize=None, mean_center=False,
post_normalize=False)
return svd,train,test
示例13: create_svd_model
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import set_data [as 别名]
def create_svd_model(train):
""" Build SVD model
"""
svd = SVD()
svd.set_data(train)
svd.compute(k=100,
min_values=0,
pre_normalize=None,
mean_center=True,
post_normalize=True)
return svd
示例14: __init__
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import set_data [as 别名]
def __init__(self):
#Dataset
data = Data()
self.filename = "emag"
if False and os.path.isfile(self.filename + ".zip"):
svd = SVD(filename=self.filename)
else:
svd = SVD()
svd.set_data(data)
#svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True, savefile="svd")
self.svd = svd
self.iterations = 0
示例15: build_svd_item_based
# 需要导入模块: from recsys.algorithm.factorize import SVD [as 别名]
# 或者: from recsys.algorithm.factorize.SVD import set_data [as 别名]
def build_svd_item_based(user_op_item_cnt, item_op_users, user_idx, item_idx, min_nonzero):
svd = SVD()
data = Data()
item_lst = []
for ui in user_op_item_cnt:
if len(user_op_item_cnt[ui]) < min_nonzero:
continue
for ti in user_op_item_cnt[ui]:
if item_op_users[ti] < min_nonzero:
continue
if 1.0*user_op_item_cnt[ui][ti] < 1:
continue
item_lst.append(ti)
data.add_tuple(((1.0*user_op_item_cnt[ui][ti]), item_idx[ti], user_idx[ui]))
item_lst = list(set(item_lst))
svd.set_data(data)
return svd, item_lst