本文整理汇总了Python中etl.ETLUtils.select_fields方法的典型用法代码示例。如果您正苦于以下问题:Python ETLUtils.select_fields方法的具体用法?Python ETLUtils.select_fields怎么用?Python ETLUtils.select_fields使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类etl.ETLUtils
的用法示例。
在下文中一共展示了ETLUtils.select_fields方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: prepare
# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import select_fields [as 别名]
def prepare(self):
print('prepare: %s' % time.strftime("%Y/%d/%m-%H:%M:%S"))
contextual_train_set =\
ETLUtils.select_fields(self.headers, self.train_records)
contextual_test_set =\
ETLUtils.select_fields(self.headers, self.records_to_predict)
ETLUtils.save_csv_file(
self.csv_train_file, contextual_train_set, self.headers)
ETLUtils.save_csv_file(
self.csv_test_file, contextual_test_set, self.headers)
print('Exported CSV and JSON files: %s'
% time.strftime("%Y/%d/%m-%H:%M:%S"))
csv_files = [
self.csv_train_file,
self.csv_test_file
]
num_cols = len(self.headers)
context_cols = num_cols
print('num_cols', num_cols)
# print('context_cols', context_cols)
libfm_converter.csv_to_libfm(
csv_files, 0, [1, 2], range(3, context_cols), ',', has_header=True,
suffix='.no_context.libfm')
libfm_converter.csv_to_libfm(
csv_files, 0, [1, 2], [], ',', has_header=True,
suffix='.context.libfm')
print('Exported LibFM files: %s' % time.strftime("%Y/%d/%m-%H:%M:%S"))
示例2: test_select_fields
# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import select_fields [as 别名]
def test_select_fields(self):
select_fields = ['user_id', 'offering_id', 'overall_rating']
result = ETLUtils.select_fields(select_fields, reviews_matrix_5)
self.assertEqual(result, reviews_matrix_5_short)
select_fields = ['user_id']
result = ETLUtils.select_fields(select_fields, reviews_matrix_5_short)
self.assertEqual(result, reviews_matrix_5_users)
示例3: full_cycle
# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import select_fields [as 别名]
def full_cycle(self, train_records, test_records, train_reviews, test_reviews):
self.lda_based_context = LdaBasedContext(train_records, train_reviews)
self.lda_based_context.get_context_rich_topics()
print("Trained LDA Model: %s" % time.strftime("%Y/%d/%m-%H:%M:%S"))
contextual_train_set = self.lda_based_context.find_contextual_topics(train_records)
contextual_test_set = self.lda_based_context.find_contextual_topics(test_records)
print("contextual test set size: %d" % len(contextual_test_set))
self.build_headers()
contextual_train_set = ETLUtils.select_fields(self.headers, contextual_train_set)
contextual_test_set = ETLUtils.select_fields(self.headers, contextual_test_set)
print("Exported contextual topics: %s" % time.strftime("%Y/%d/%m-%H:%M:%S"))
return contextual_train_set, contextual_test_set
示例4: load_data
# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import select_fields [as 别名]
def load_data(json_file):
records = ETLUtils.load_json_file(json_file)
fields = ['user_id', 'business_id', 'stars']
records = ETLUtils.select_fields(fields, records)
# We rename the 'stars' field to 'overall_rating' to take advantage of the
# function extractor.get_user_average_overall_rating
for record in records:
record['overall_rating'] = record.pop('stars')
record['offering_id'] = record.pop('business_id')
return records
示例5: main_converter
# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import select_fields [as 别名]
def main_converter():
csv_train_file = GENERATED_FOLDER + 'yelp_training_set_review_' + DATASET + 's_shuffled_train.csv'
csv_test_file = GENERATED_FOLDER + 'records_to_predict_' + DATASET + '.csv'
# ETLUtils.json_to_csv(TRAIN_RECORDS_FILE, csv_train_file, 'user_id', 'business_id', 'stars', False, True)
# ETLUtils.json_to_csv(RECORDS_TO_PREDICT_FILE, csv_test_file, 'user_id', 'business_id', 'stars', False, True)
headers = ['stars', 'user_id', 'business_id']
train_records = ETLUtils.load_json_file(TRAIN_RECORDS_FILE)
records_to_predict = ETLUtils.load_json_file(RECORDS_TO_PREDICT_FILE)
train_records = ETLUtils.select_fields(headers, train_records)
records_to_predict = ETLUtils.select_fields(headers, records_to_predict)
ETLUtils.save_csv_file(csv_train_file, train_records, headers)
ETLUtils.save_csv_file(csv_test_file, records_to_predict, headers)
csv_files = [
csv_train_file,
csv_test_file
]
csv_to_libfm(csv_files, 0, [1, 2], [], ',', has_header=True)
示例6: pre_process_reviews
# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import select_fields [as 别名]
def pre_process_reviews():
"""
Returns a list of preprocessed reviews, where the reviews have been filtered
to obtain only relevant data, have dropped any fields that are not useful,
and also have additional fields that are handy to make calculations
:return: a list of preprocessed reviews
"""
reviews_file = '/Users/fpena/UCC/Thesis/datasets/yelp_phoenix_academic_dataset/yelp_academic_dataset_review.json'
reviews = ETLUtils.load_json_file(reviews_file)
select_fields = ['user_id', 'business_id', 'stars']
reviews = ETLUtils.select_fields(select_fields, reviews)
extract_fields(reviews)
ETLUtils.drop_fields(['business_id', 'stars'], reviews)
# reviews = load_json_file('/Users/fpena/tmp/filtered_reviews.json')
reviews = clean_reviews(reviews)
return reviews
示例7: pre_process_reviews
# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import select_fields [as 别名]
def pre_process_reviews():
"""
Returns a list of preprocessed reviews, where the reviews have been filtered
to obtain only relevant data, have dropped any fields that are not useful,
and also have additional fields that are handy to make calculations
:return: a list of preprocessed reviews
"""
data_folder = '/Users/fpena/UCC/Thesis/datasets/TripAdvisor/Four-City/'
review_file_path = data_folder + 'review.txt'
# review_file_path = data_folder + 'review-short.json'
reviews = ETLUtils.load_json_file(review_file_path)
select_fields = ['ratings', 'author', 'offering_id']
reviews = ETLUtils.select_fields(select_fields, reviews)
extract_fields(reviews)
ETLUtils.drop_fields(['author', 'ratings'], reviews)
# reviews = load_json_file('/Users/fpena/tmp/filtered_reviews.json')
# reviews = preflib_extractor.load_csv_file('/Users/fpena/UCC/Thesis/datasets/TripAdvisor/PrefLib/trip/CD-00001-00000001-copy.dat')
reviews = clean_reviews(reviews)
return reviews