当前位置: 首页>>代码示例>>Python>>正文


Python ETLUtils.save_json_file方法代码示例

本文整理汇总了Python中etl.ETLUtils.save_json_file方法的典型用法代码示例。如果您正苦于以下问题:Python ETLUtils.save_json_file方法的具体用法?Python ETLUtils.save_json_file怎么用?Python ETLUtils.save_json_file使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在etl.ETLUtils的用法示例。


在下文中一共展示了ETLUtils.save_json_file方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: export_records_to_predict

# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import save_json_file [as 别名]
 def export_records_to_predict(self, records_file):
     if self.records_to_predict is None:
         self.records_to_predict = self.get_records_to_predict()
     ETLUtils.save_json_file(records_file, self.records_to_predict)
     with open(records_file + '.pkl', 'wb') as write_file:
         pickle.dump(
             self.items_to_predict, write_file, pickle.HIGHEST_PROTOCOL)
开发者ID:melqkiades,项目名称:yelp,代码行数:9,代码来源:top_n_evaluator.py

示例2: export_records

# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import save_json_file [as 别名]
 def export_records(self):
     print('%s: get_records_to_predict_topn records' % time.strftime("%Y/%m/%d-%H:%M:%S"))
     self.dictionary.save(Constants.DICTIONARY_FILE)
     ETLUtils.save_json_file(
         Constants.FULL_PROCESSED_RECORDS_FILE, self.records)
     self.drop_unnecessary_fields()
     ETLUtils.save_json_file(Constants.PROCESSED_RECORDS_FILE, self.records)
开发者ID:neostoic,项目名称:yelp-1,代码行数:9,代码来源:yelp_reviews_preprocessor.py

示例3: main

# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import save_json_file [as 别名]
def main():
    # dataset = 'hotel'
    dataset = 'restaurant'
    my_folder = '/Users/fpena/UCC/Thesis/datasets/context/'
    my_training_records_file =\
        my_folder + 'classified_' + dataset + '_reviews.json'
    my_training_reviews_file =\
        my_folder + 'classified_' + dataset + '_reviews.pkl'
    my_training_records = ETLUtils.load_json_file(my_training_records_file)

    with open(my_training_reviews_file, 'rb') as read_file:
        my_training_reviews = pickle.load(read_file)

    classifier = ReviewsClassifier()
    classifier.train(my_training_records, my_training_reviews)

    my_input_records_file =\
        my_folder + 'yelp_training_set_review_' + dataset + 's_shuffled.json'
    my_input_reviews_file =\
        my_folder + 'reviews_' + dataset + '_shuffled.pkl'
    my_output_records_file =\
        my_folder + 'yelp_training_set_review_' + dataset +\
        's_shuffled_tagged.json'

    with open(my_input_reviews_file, 'rb') as read_file:
        my_input_reviews = pickle.load(read_file)

    my_input_records = ETLUtils.load_json_file(my_input_records_file)

    my_output_records =\
        classifier.label_json_reviews(my_input_records, my_input_reviews)

    ETLUtils.save_json_file(my_output_records_file, my_output_records)
开发者ID:bachlog,项目名称:yelp,代码行数:35,代码来源:reviews_classifier.py

示例4: test

# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import save_json_file [as 别名]
def test():
    document_term_matrix = NmfTopicExtractor.load_document_term_matrix()

    results = []

    # my_list = range(2, 31)
    my_list = range(2, 61)

    for i in my_list:
        Constants.update_properties({Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: i})
        topic_model = NmfTopicExtractor()
        topic_model.load_trained_data()

        document_topic_matrix = topic_model.document_topic_matrix
        topic_term_matrix = topic_model.topic_term_matrix

        divergence = calculate_divergence(
            document_term_matrix, document_topic_matrix, topic_term_matrix)

        result = {
            'num_topics': Constants.TOPIC_MODEL_NUM_TOPICS,
            'divergence': divergence,
            Constants.TOPIC_MODEL_TYPE_FIELD: 'ensemble',
            Constants.BUSINESS_TYPE_FIELD: Constants.ITEM_TYPE
        }

        results.append(result)

        print('Num topics: %d, Divergence: %f' %
              (Constants.TOPIC_MODEL_NUM_TOPICS, divergence))

    for result in results:
        print('%d %f' % (result['num_topics'], result['divergence']))

    prefix = Constants.RESULTS_FOLDER + Constants.ITEM_TYPE +\
        '_topic_model_divergence'
    csv_file_path = prefix + '.csv'
    json_file_path = prefix + '.json'
    headers = sorted(results[0].keys())
    ETLUtils.save_csv_file(csv_file_path, results, headers)
    ETLUtils.save_json_file(json_file_path, results)
开发者ID:melqkiades,项目名称:yelp,代码行数:43,代码来源:topic_model_divergence.py

示例5: export_records

# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import save_json_file [as 别名]
    def export_records(self):
        print('%s: exporting transformed records' %
              time.strftime("%Y/%m/%d-%H:%M:%S"))

        records_to_export = []
        desired_fields = [
            Constants.USER_INTEGER_ID_FIELD,
            Constants.ITEM_INTEGER_ID_FIELD,
            Constants.RATING_FIELD,
            Constants.CONTEXT_FIELD,
        ]

        for record in self.records:
            new_record = {field: record[field] for field in desired_fields}
            records_to_export.append(new_record)

        file_name = Constants.generate_file_name(
            'recsys_formatted_context_records', 'json', Constants.CACHE_FOLDER,
            None, None, True, True, uses_carskit=False, normalize_topics=True,
            format_context=True)
        ETLUtils.save_json_file(file_name, records_to_export)
开发者ID:melqkiades,项目名称:yelp,代码行数:23,代码来源:context_transformer.py

示例6: load_all_reviews

# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import save_json_file [as 别名]
def load_all_reviews():
    city_files = [
        '/Users/fpena/UCC/Thesis/datasets/TripHotelReviewXml/Chicago_review.xml',
        '/Users/fpena/UCC/Thesis/datasets/TripHotelReviewXml/Dublin_review.xml',
        '/Users/fpena/UCC/Thesis/datasets/TripHotelReviewXml/Hong kong_review.xml',
        '/Users/fpena/UCC/Thesis/datasets/TripHotelReviewXml/London_review.xml',
        '/Users/fpena/UCC/Thesis/datasets/TripHotelReviewXml/New York_review.xml',
        '/Users/fpena/UCC/Thesis/datasets/TripHotelReviewXml/Singapore_review.xml'
    ]

    all_reviews = []

    for city_file in city_files:
        city_reviews = load_reviews(city_file)
        all_reviews.extend(city_reviews)

    ETLUtils.save_json_file('/Users/fpena/UCC/Thesis/datasets/TripHotelReviewXml/all_reviews.json', all_reviews)

    cleaned_reviews = clean_reviews(all_reviews)
    ETLUtils.save_json_file('/Users/fpena/UCC/Thesis/datasets/TripHotelReviewXml/cleaned_reviews.json', cleaned_reviews)

    return all_reviews
开发者ID:antoine-tran,项目名称:yelp,代码行数:24,代码来源:ruihai_extractor.py

示例7: label_json_reviews

# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import save_json_file [as 别名]
    def label_json_reviews(self, input_file, output_file, reviews=None):

        records = ETLUtils.load_json_file(input_file)

        if reviews is None:
            reviews = []
            for record in records:
                reviews.append(Review(record["text"]))

        if len(records) != len(reviews):
            msg = "The size of the records and reviews arrays must be the same"
            raise ValueError(msg)
        predicted_classes = self.predict(reviews)

        for record, predicted_class in zip(records, predicted_classes):
            if predicted_class:
                label = "specific"
            else:
                label = "generic"

            record["predicted_class"] = label

        ETLUtils.save_json_file(output_file, records)
开发者ID:anuragreddygv323,项目名称:yelp,代码行数:25,代码来源:reviews_classifier.py

示例8: extract_fields

# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import save_json_file [as 别名]
    return reviews

def extract_fields(reviews):
    """
    Modifies the given list of reviews in order to extract the values contained
    in the ratings field to top level fields. For instance, a review which is
    in the form
    {'user_id': 'U1', 'offering_id': :'I1',
    'ratings': {'cleanliness': 4.0, 'location': 5.0}}
    would become:

    {'user_id': 'U1', 'offering_id': :'I1',
    'ratings': {'cleanliness': 4.0, 'location': 5.0},
    'cleanliness_rating': 4.0, 'location_rating': 5.0}

    :param reviews: a list of reviews.
    """

    for review in reviews:
        review['offering_id'] = review['business_id']
        review['overall_rating'] = review['stars']


my_reviews = pre_process_reviews()
filtered_reviews_file = '/Users/fpena/UCC/Thesis/datasets/yelp_phoenix_academic_dataset/filtered_reviews.json'
ETLUtils.save_json_file(filtered_reviews_file, my_reviews)
# print('Num reviews', len(my_reviews))
print(my_reviews[0])
print(my_reviews[1])
# print(my_reviews[2])
# print(my_reviews[3])
开发者ID:antoine-tran,项目名称:yelp,代码行数:33,代码来源:yelp_phoenix_extractor.py

示例9: transform_manually_labeled_reviews

# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import save_json_file [as 别名]
def transform_manually_labeled_reviews():

    full_records = ETLUtils.load_json_file(Constants.DATASET_FOLDER + 'yelp_training_set_review_restaurants_shuffled_tagged.json')

    records = ETLUtils.load_json_file(Constants.CLASSIFIED_RECORDS_FILE)
    print('total records: %d' % len(records))

    new_records = []

    for record in records:

        sentence_index = record['sentence_index']

        if sentence_index > 0:
            continue
        record['predicted_class'] = record['sentence_type']
        new_records.append(record)

    # count = 0
    # for new_record in new_records:
    #     internal_count = 0
    #     for full_record in full_records:
    #         if full_record['text'].startswith(new_record['text']):
    #             # print(full_record['text'])
    #             internal_count += 1
    #             count += 1
    #             print('internal count: %d\treview_id: %s' % (internal_count, full_record['review_id']))
    #
    #             if internal_count > 1:
    #                 print('internal count: %d\treview_id: %s' % (internal_count, new_record['text']))

    # print('count: %d' % count)

    index = 0

    for new_record in new_records:

        while True:

            full_record = full_records[index]

            if full_record['text'].startswith(new_record['text']):
                new_record[Constants.USER_ID_FIELD] = full_record['user_id']
                new_record[Constants.ITEM_ID_FIELD] = full_record['business_id']
                new_record[Constants.REVIEW_ID_FIELD] = full_record['review_id']
                new_record[Constants.RATING_FIELD] = full_record['stars']
                break
            index += 1
        index += 1

    print('index: %d' % index)

    for new_record in new_records:

        for full_record in full_records:
            if new_record['review_id'] == full_record['review_id']:
                print('%s ====' % new_record['text'])
                print(full_record['text'])
                print('******************\n******************\n******************\n******************')
                break

    # reviews_preprocessor = ReviewsPreprocessor()
    # new_records = reviews_preprocessor.lemmatize_sentences(new_records)
    # reviews_preprocessor.records = new_records
    # reviews_preprocessor.build_bag_of_words()
    # reviews_preprocessor.drop_unnecessary_fields()

    new_classified_records_file = Constants.DATASET_FOLDER + 'classified_' + \
        Constants.ITEM_TYPE + '_reviews_first_sentences.json'

    print(new_records[0])

    ETLUtils.save_json_file(new_classified_records_file, new_records)
开发者ID:melqkiades,项目名称:yelp,代码行数:75,代码来源:main.py

示例10: sort_records

# 需要导入模块: from etl import ETLUtils [as 别名]
# 或者: from etl.ETLUtils import save_json_file [as 别名]
            if review['business_id'] in business_ids:
                filtered_reviews.append(review)

        return filtered_reviews

    @staticmethod
    def sort_records(records, field, reverse=False):
        return sorted(records, key=itemgetter(field), reverse=reverse)



start = time.time()

review_etl = ReviewETL()
my_business_file = "/Users/fpena/tmp/yelp_training_set/yelp_training_set_business.json"
my_reviews_file = "/Users/fpena/tmp/yelp_training_set/yelp_training_set_review.json"
my_business_ids = BusinessETL.get_business_ids(my_business_file, 'Hotels')
my_reviews = ETLUtils.load_json_file(my_reviews_file)
# print(len(ReviewETL.filter_reviews_by_business(my_reviews, my_business_ids, 'text')))
my_restaurant_reviews = ReviewETL.filter_reviews_by_business_slow(my_reviews, my_business_ids)
my_restaurants_file = "/Users/fpena/tmp/yelp_training_set/yelp_training_set_review_hotels.json"
ETLUtils.save_json_file(my_restaurants_file, my_restaurant_reviews)
# my_sorted_reviews = ReviewETL.sort_records(my_reviews, 'business_id')
# print(len(my_sorted_reviews))


# main()
end = time.time()
total_time = end - start
print("Total time = %f seconds" % total_time)
开发者ID:antoine-tran,项目名称:yelp,代码行数:32,代码来源:review_etl.py


注:本文中的etl.ETLUtils.save_json_file方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。