本文整理汇总了Python中etl.ETLUtils.drop_fields方法的典型用法代码示例。如果您正苦于以下问题:Python ETLUtils.drop_fields方法的具体用法?Python ETLUtils.drop_fields怎么用?Python ETLUtils.drop_fields使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类etl.ETLUtils
的用法示例。
在下文中一共展示了ETLUtils.drop_fields方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: multiple_lineal_regression
# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import drop_fields [as 别名]
def multiple_lineal_regression(file_path):
records = ReviewETL.load_file(file_path)
ratings = np.array([record['stars'] for record in records])
ETLUtils.drop_fields(['stars'], records)
data = np.array([record.values() for record in records])
# Create linear regression object
regr = linear_model.LinearRegression()
# Train the model using the training sets
regr.fit(data, ratings)
model = linear_model.LinearRegression(fit_intercept=True)
model.fit(data, ratings)
p = np.array([model.predict(xi) for xi in data])
e = p - ratings
total_error = np.dot(e, e)
rmse_train = np.sqrt(total_error / len(p))
kf = KFold(len(data), n_folds=10)
err = 0
for train, test in kf:
model.fit(data[train], ratings[train])
p = np.array([model.predict(xi) for xi in data[test]])
e = p - ratings[test]
err += np.dot(e, e)
rmse_10cv = np.sqrt(err / len(data))
print('RMSE on training: {}'.format(rmse_train))
print('RMSE on 10-fold CV: {}'.format(rmse_10cv))
示例2: drop_unwanted_fields
# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import drop_fields [as 别名]
def drop_unwanted_fields(dictionary_list):
"""
Drops fields that are not useful for data analysis in the business
data set
:rtype : void
:param dictionary_list: the list of dictionaries containing the data
"""
unwanted_fields = [
'attributes',
'business_id',
'categories',
'city',
'full_address',
'latitude',
'longitude',
'hours',
'name',
'neighborhoods',
'open',
'review_count',
'stars',
'state',
'type'
]
ETLUtils.drop_fields(unwanted_fields, dictionary_list)
示例3: drop_unnecessary_fields
# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import drop_fields [as 别名]
def drop_unnecessary_fields(self):
print('%s: drop unnecessary fields' % time.strftime("%Y/%m/%d-%H:%M:%S"))
unnecessary_fields = [
Constants.TEXT_FIELD,
Constants.POS_TAGS_FIELD,
Constants.VOTES_FIELD,
Constants.BOW_FIELD
]
ETLUtils.drop_fields(unnecessary_fields, self.records)
示例4: test_drop_fields
# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import drop_fields [as 别名]
def test_drop_fields(self):
drop_fields = [
'cleanliness_rating',
'location_rating',
'rooms_rating',
'service_rating',
'value_rating'
]
test_list = list(reviews_matrix_5)
ETLUtils.drop_fields(drop_fields, test_list)
self.assertEqual(reviews_matrix_5_short, test_list)
test_list = list(reviews_matrix_5_short)
self.assertEqual(reviews_matrix_5_short, test_list)
示例5: pre_process_reviews
# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import drop_fields [as 别名]
def pre_process_reviews():
"""
Returns a list of preprocessed reviews, where the reviews have been filtered
to obtain only relevant data, have dropped any fields that are not useful,
and also have additional fields that are handy to make calculations
:return: a list of preprocessed reviews
"""
reviews_file = '/Users/fpena/UCC/Thesis/datasets/yelp_phoenix_academic_dataset/yelp_academic_dataset_review.json'
reviews = ETLUtils.load_json_file(reviews_file)
select_fields = ['user_id', 'business_id', 'stars']
reviews = ETLUtils.select_fields(select_fields, reviews)
extract_fields(reviews)
ETLUtils.drop_fields(['business_id', 'stars'], reviews)
# reviews = load_json_file('/Users/fpena/tmp/filtered_reviews.json')
reviews = clean_reviews(reviews)
return reviews
示例6: pre_process_reviews
# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import drop_fields [as 别名]
def pre_process_reviews():
"""
Returns a list of preprocessed reviews, where the reviews have been filtered
to obtain only relevant data, have dropped any fields that are not useful,
and also have additional fields that are handy to make calculations
:return: a list of preprocessed reviews
"""
data_folder = '/Users/fpena/UCC/Thesis/datasets/TripAdvisor/Four-City/'
review_file_path = data_folder + 'review.txt'
# review_file_path = data_folder + 'review-short.json'
reviews = ETLUtils.load_json_file(review_file_path)
select_fields = ['ratings', 'author', 'offering_id']
reviews = ETLUtils.select_fields(select_fields, reviews)
extract_fields(reviews)
ETLUtils.drop_fields(['author', 'ratings'], reviews)
# reviews = load_json_file('/Users/fpena/tmp/filtered_reviews.json')
# reviews = preflib_extractor.load_csv_file('/Users/fpena/UCC/Thesis/datasets/TripAdvisor/PrefLib/trip/CD-00001-00000001-copy.dat')
reviews = clean_reviews(reviews)
return reviews