本文整理汇总了Python中etl.ETLUtils.load_csv_file方法的典型用法代码示例。如果您正苦于以下问题:Python ETLUtils.load_csv_file方法的具体用法?Python ETLUtils.load_csv_file怎么用?Python ETLUtils.load_csv_file使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类etl.ETLUtils
的用法示例。
在下文中一共展示了ETLUtils.load_csv_file方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_ml_100K_dataset
# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import load_csv_file [as 别名]
def get_ml_100K_dataset():
# records = ETLUtils.load_csv_file('/Users/fpena/tmp/bpmf/ml-1k.csv', '\t')
records = ETLUtils.load_csv_file('/Users/fpena/tmp/bpmf/ml-100k.csv', '\t')
# records = ETLUtils.load_csv_file('/Users/fpena/UCC/Thesis/datasets/uncompressed/ml-100k.csv', '\t')
for record in records:
record['overall_rating'] = float(record['overall_rating'])
return records
示例2: parse_dafevara_file
# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import load_csv_file [as 别名]
def parse_dafevara_file():
ARTISTS_NAMES_FIELD = 'artists_names'
folder = '/Users/fpena/tmp/dafevara/'
file_path = folder + 'artists-names-by-userId.csv'
records = ETLUtils.load_csv_file(file_path, '|')
for record in records:
artists = record[ARTISTS_NAMES_FIELD].replace(' ', '_')
record[ARTISTS_NAMES_FIELD] = artists.replace(';', ' ')
# print(record[ARTISTS_NAMES_FIELD])
output_file = folder + 'user_artists.txt'
with open(output_file, 'w') as of:
for record in records:
of.write('%s\n' % record[ARTISTS_NAMES_FIELD])
示例3: add_extra_column_to_csv
# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import load_csv_file [as 别名]
def add_extra_column_to_csv():
csv_file_name = '/tmp/results/rival_yelp_restaurant_results_folds_4.csv'
records = ETLUtils.load_csv_file(csv_file_name)
with open(csv_file_name, 'r') as csvinput:
reader = csv.reader(csvinput)
headers = next(reader)
index = headers.index('Evaluation_Set') + 1
headers.insert(index, Constants.FM_NUM_FACTORS_FIELD)
print(headers)
for record in records:
record[Constants.FM_NUM_FACTORS_FIELD] = 10
ETLUtils.save_csv_file('/tmp/my_csv_file.csv', records, headers)
示例4: export_results
# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import load_csv_file [as 别名]
def export_results(fold):
recommender = Constants.CARSKIT_RECOMMENDERS
ratings_fold_folder = Constants.RIVAL_RATINGS_FOLD_FOLDER % fold
prediction_type_map = {
'user_test': 'rating',
'test_items': 'rating',
'rel_plus_n': 'ranking'
}
prediction_type = prediction_type_map[Constants.RIVAL_EVALUATION_STRATEGY]
# ratings_file = ratings_fold_folder + 'UserSplitting-BiasedMF-rating-predictions.txt'
ratings_file = ratings_fold_folder + recommender + '-rating-predictions.txt'
results_file = ratings_fold_folder + 'carskit_' + recommender +\
'_results_' + prediction_type + '.txt'
records = ETLUtils.load_csv_file(ratings_file, '\t')
predictions = [record['prediction'] for record in records]
with open(results_file, 'w') as f:
for prediction in predictions:
f.write("%s\n" % prediction)
示例5: get_ml_1m_dataset
# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import load_csv_file [as 别名]
def get_ml_1m_dataset():
records = ETLUtils.load_csv_file('/Users/fpena/UCC/Thesis/datasets/uncompressed/ml-1m.csv', '|')
for record in records:
record['overall_rating'] = float(record['overall_rating'])
return records
示例6: print
# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import load_csv_file [as 别名]
# svd.load_data(filename=file_name, sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int})
file_name = '/Users/fpena/tmp/reviews.csv'
file_name_header = '/Users/fpena/tmp/reviews-header.csv'
# file_name = '/Users/fpena/tmp/small-reviews-matrix.csv'
# file_name_header = '/Users/fpena/tmp/small-reviews-header.csv'
svd.load_data(filename=file_name, sep='|', format={'col':0, 'row':1, 'value':2, 'ids': str})
k = 100
svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True)
# predicted_rating = svd.predict(int(5), 'A1', 1, 10)
# predicted_rating2 = svd.predict(int(1), 'A1', 1, 10)
# print('Predicted rating', predicted_rating)
# print('Predicted rating', predicted_rating2)
records = ETLUtils.load_csv_file(file_name_header, '|')
errors = []
for record in records:
try:
# print(record['user'], record['item'], record['rating'])
user = record['user']
item = int(record['item'])
predicted_rating = svd.predict(item, user, 1, 5)
print(record['user'], record['item'], predicted_rating)
# predicted_rating = round(predicted_rating)
actual_rating = svd.get_matrix().value(item, user)
error = abs(predicted_rating - actual_rating)
errors.append(error)
except KeyError:
continue