当前位置: 首页>>代码示例>>Python>>正文


Python DataModel.generate_word_probabilites方法代码示例

本文整理汇总了Python中DataModel.generate_word_probabilites方法的典型用法代码示例。如果您正苦于以下问题:Python DataModel.generate_word_probabilites方法的具体用法?Python DataModel.generate_word_probabilites怎么用?Python DataModel.generate_word_probabilites使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在DataModel的用法示例。


在下文中一共展示了DataModel.generate_word_probabilites方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: NaiveBayesClassifier

# 需要导入模块: import DataModel [as 别名]
# 或者: from DataModel import generate_word_probabilites [as 别名]
class NaiveBayesClassifier(object):
    def __init__(self, training_file, testing_file):
        super(NaiveBayesClassifier, self).__init__()
        self.training_file = training_file
        self.testing_file = testing_file
        self.model = None
        self.time_to_train_string = ""
        self.time_to_label_string = ""
        self.classification_accuracy_string = ""
        self.training_accuracy_string = ""

    def train(self):
        start = time.clock()
        reviews = open(self.training_file, "r")
        word_count = [[], []]
        # Populate word_count array with 2 arrays, word_count[0] is for the 0 reviews and word_count[1] is
        # for the 1 reviews. Each element of those arrays contain a dictionary for the word count for a specific review
        ReviewParser.parse_for_training(reviews, word_count)

        self.create_model(word_count)
        end = time.clock()
        self.time_to_train_string = str(int(end - start)) + " seconds (training)"

    def classify(self):
        start = time.clock()
        reviews = open(self.testing_file, "r")
        parsed_reviews = ReviewParser.parse_for_labeling(reviews)
        classified_reviews = self.classify_reviews(parsed_reviews)
        # This method will print the labels followed by a newline
        self.get_classification_accuracy(classified_reviews)
        self.run_classification_on_training()
        end = time.clock()
        self.time_to_label_string = str(int(end - start)) + " seconds (labeling)"

        # Print all the required timing and accuracy
        print(self.time_to_train_string)
        print(self.time_to_label_string)
        print(self.training_accuracy_string)
        print(self.classification_accuracy_string)

    # Helper Methods

    # Training
    def create_model(self, review_word_counts):
        self.model = DataModel(len(review_word_counts[1]), len(review_word_counts[0]))
        self.model.generate_word_probabilites(review_word_counts[1], review_word_counts[0])

    # Classification
    def classify_reviews(self, parsed_reviews):
        classified_reviews = []
        for review_info in parsed_reviews:
            prob_neg = math.log10(self.model.probability_is_negative_review)
            prob_pos = math.log10(self.model.probability_is_positive_review)
            # Use the sum of logs to deal with really small numbers log(P(C=c)) + sum_i(P(X=x_i|C=c))
            for word in review_info["words"][1:]:
                if word in self.model.word_probabilites["negative"]:
                    conditional_neg = self.model.word_probabilites["negative"][word]
                    if conditional_neg > 0:
                        prob_neg += math.log10(conditional_neg)
                if word in self.model.word_probabilites["positive"]:
                    conditional_pos = self.model.word_probabilites["positive"][word]
                    if conditional_pos > 0:
                        prob_pos += math.log10(conditional_pos)
            classified_reviews.append(
                {
                    "real_category" : int(review_info["real_category"]),
                    "calculated_category" : 1 if prob_pos > prob_neg else 0
                }
            )
        return classified_reviews

    def get_classification_accuracy(self, classified_reviews):
        num_correct = 0
        total = 0
        for review_classification in classified_reviews:
            total += 1
            print(review_classification["calculated_category"])
            if review_classification["real_category"] == review_classification["calculated_category"]:
                num_correct += 1

        classification_accuracy = float(num_correct) / float(total)
        self.classification_accuracy_string = str(classification_accuracy) + " (testing)"

    def get_training_accuracy(self, classified_reviews):
        num_correct = 0
        total = 0
        for review_classification in classified_reviews:
            total += 1
            if review_classification["real_category"] == review_classification["calculated_category"]:
                num_correct += 1

        training_accuracy = float(num_correct) / float(total)
        self.training_accuracy_string = str(training_accuracy) + " (training)"

    def run_classification_on_training(self):
        reviews = open(self.training_file, "r")
        parsed_reviews = ReviewParser.parse_for_labeling(reviews)
        classified_reviews = self.classify_reviews(parsed_reviews)
        self.get_training_accuracy(classified_reviews)
开发者ID:spencerprescott,项目名称:NaiveBayes,代码行数:101,代码来源:NaiveBayesClassifier.py


注:本文中的DataModel.generate_word_probabilites方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。