当前位置: 首页>>代码示例>>Python>>正文


Python NaiveBayesClassifier.update方法代码示例

本文整理汇总了Python中textblob.classifiers.NaiveBayesClassifier.update方法的典型用法代码示例。如果您正苦于以下问题:Python NaiveBayesClassifier.update方法的具体用法?Python NaiveBayesClassifier.update怎么用?Python NaiveBayesClassifier.update使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在textblob.classifiers.NaiveBayesClassifier的用法示例。


在下文中一共展示了NaiveBayesClassifier.update方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: TestValidators

# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import update [as 别名]
class TestValidators(TestCase):

    def setUp(self):
        self.data = StringIO('{}')
        self.classifier = NaiveBayesClassifier(self.data, format='json')
        self.classifier.update([
            ('spam spam spam', 'spam'),
            ('this is not spam', 'valid'),
        ])

        self.mock_classifier_get = mock.patch.object(
            ClassifierValidator,
            'get_classifier',
            mock.Mock(return_value=self.classifier)
        )
        self.patch_classifier_get = self.mock_classifier_get.start()

    def test_validator_pass(self):
        validate = ClassifierValidator()
        validate('this is totally legit')

    def test_validator_invalid(self):
        validate = ClassifierValidator()
        with self.assertRaises(ValidationError):
            validate('spam spammy spam')

    def test_validator_invalid_different_exception(self):
        validate = ClassifierValidator(raises=ValueError)
        with self.assertRaises(ValueError):
            validate('spam spammy spam')

    @mock.patch('textclassifier.classifier.TEXTCLASSIFIER_DATA_FILE', '')
    def test_open_file_failure(self):
        """Open file, but still validate after errors"""
        self.mock_classifier_get.stop()
        mod_name = ('builtins', '__builtin__')[(sys.version_info < (3,0))]
        with mock.patch('{0}.open'.format(mod_name)) as mocked_open:
            mocked_open.side_effect = IOError
            with self.assertRaises(IOError):
                DefaultClassifier()
            validate = ClassifierValidator()
            validate('spam spam spam')
开发者ID:pombredanne,项目名称:django-textclassifier,代码行数:44,代码来源:test_validators.py

示例2: BankClassify

# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import update [as 别名]
class BankClassify():

    def __init__(self, data="AllData.csv"):
        """Load in the previous data (by default from AllData.csv) and initialise the classifier"""
        if os.path.exists(data):
            self.prev_data = pd.read_csv(data)
        else:
            self.prev_data = pd.DataFrame(columns=['date', 'desc', 'amount', 'cat'])

        self.classifier = NaiveBayesClassifier(self._get_training(self.prev_data), self._extractor)

    def add_data(self, filename):
        """Add new data and interactively classify it.

        Arguments:
         - filename: filename of Santander-format file
        """
        self.new_data = self._read_santander_file(filename)

        self._ask_with_guess(self.new_data)

        self.prev_data = pd.concat([self.prev_data, self.new_data])
        self.prev_data.to_csv("AllData.csv", index=False)

    def _prep_for_analysis(self):
        """Prepare data for analysis in pandas, setting index types and subsetting"""
        self.prev_data = self._make_date_index(self.prev_data)

        self.prev_data['cat'] = self.prev_data['cat'].str.strip()

        self.inc = self.prev_data[self.prev_data.amount > 0]
        self.out = self.prev_data[self.prev_data.amount < 0]
        self.out.amount = self.out.amount.abs()

        self.inc_noignore = self.inc[self.inc.cat != 'Ignore']
        self.inc_noexpignore = self.inc[(self.inc.cat != 'Ignore') & (self.inc.cat != 'Expenses')]

        self.out_noignore = self.out[self.out.cat != 'Ignore']
        self.out_noexpignore = self.out[(self.out.cat != 'Ignore') & (self.out.cat != 'Expenses')]

    def _read_categories(self):
        """Read list of categories from categories.txt"""
        categories = {}

        with open('categories.txt') as f:
            for i, line in enumerate(f.readlines()):
                categories[i] = line.strip()

        return categories

    def _add_new_category(self, category):
        """Add a new category to categories.txt"""
        with open('categories.txt', 'a') as f:
            f.write('\n' + category)

    def _ask_with_guess(self, df):
        """Interactively guess categories for each transaction in df, asking each time if the guess
        is correct"""
        # Initialise colorama
        init()

        df['cat'] = ""

        categories = self._read_categories()

        for index, row in df.iterrows():

            # Generate the category numbers table from the list of categories
            cats_list = [[idnum, cat] for idnum, cat in categories.items()]
            cats_table = tabulate(cats_list)

            stripped_text = self._strip_numbers(row['desc'])

            # Guess a category using the classifier (only if there is data in the classifier)
            if len(self.classifier.train_set) > 1:
                guess = self.classifier.classify(stripped_text)
            else:
                guess = ""


            # Print list of categories
            print(chr(27) + "[2J")
            print(cats_table)
            print("\n\n")
            # Print transaction
            print("On: %s\t %.2f\n%s" % (row['date'], row['amount'], row['desc']))
            print(Fore.RED  + Style.BRIGHT + "My guess is: " + str(guess) + Fore.RESET)

            input_value = input("> ")

            if input_value.lower() == 'q':
                # If the input was 'q' then quit
                return df
            if input_value == "":
                # If the input was blank then our guess was right!
                df.ix[index, 'cat'] = guess
                self.classifier.update([(stripped_text, guess)])
            else:
                # Otherwise, our guess was wrong
                try:
#.........这里部分代码省略.........
开发者ID:robintw,项目名称:BankClassify,代码行数:103,代码来源:BankClassify.py

示例3: NaiveBayesClassifier

# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import update [as 别名]
    ("I can't deal with this", 'neg'),
    ('He is my sworn enemy!', 'neg'),
    ('My boss is horrible.', 'neg')
]
test = [
    ('The beer was good.', 'pos'),
    ('I do not enjoy my job', 'neg'),
    ("I ain't feeling dandy today.", 'neg'),
    ("I feel amazing!", 'pos'),
    ('Gary is a friend of mine.', 'pos'),
    ("I can't believe I'm doing this.", 'neg')
]

cl = NaiveBayesClassifier(train)

# Grab some movie review data
reviews = [(list(movie_reviews.words(fileid)), category)
              for category in movie_reviews.categories()
              for fileid in movie_reviews.fileids(category)]
random.shuffle(reviews)
new_train, new_test = reviews[0:100], reviews[101:200]

# Update the classifier with the new training data
cl.update(new_train)

# Compute accuracy
accuracy = cl.accuracy(test + new_test)
print("Accuracy: {0}".format(accuracy))

# Show 5 most informative features
cl.show_informative_features(5)
开发者ID:cubecnelson,项目名称:FYP,代码行数:33,代码来源:tweet_classifier.py

示例4: TestNaiveBayesClassifier

# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import update [as 别名]
class TestNaiveBayesClassifier(unittest.TestCase):

    def setUp(self):
        self.classifier = NaiveBayesClassifier(train_set)

    def test_default_extractor(self):
        text = "I feel happy this morning."
        assert_equal(self.classifier.extract_features(text), basic_extractor(text, train_set))

    def test_classify(self):
        res = self.classifier.classify("I feel happy this morning")
        assert_equal(res, 'positive')
        assert_equal(len(self.classifier.train_set), len(train_set))

    def test_classify_a_list_of_words(self):
        res = self.classifier.classify(["I", "feel", "happy", "this", "morning"])
        assert_equal(res, "positive")

    def test_train_from_lists_of_words(self):
        # classifier can be trained on lists of words instead of strings
        train = [(doc.split(), label) for doc, label in train_set]
        classifier = NaiveBayesClassifier(train)
        assert_equal(classifier.accuracy(test_set),
                        self.classifier.accuracy(test_set))

    def test_prob_classify(self):
        res = self.classifier.prob_classify("I feel happy this morning")
        assert_equal(res.max(), "positive")
        assert_true(res.prob("positive") > res.prob("negative"))

    def test_accuracy(self):
        acc = self.classifier.accuracy(test_set)
        assert_true(isinstance(acc, float))

    def test_update(self):
        res1 = self.classifier.prob_classify("lorem ipsum")
        original_length = len(self.classifier.train_set)
        self.classifier.update([("lorem ipsum", "positive")])
        new_length = len(self.classifier.train_set)
        res2 = self.classifier.prob_classify("lorem ipsum")
        assert_true(res2.prob("positive") > res1.prob("positive"))
        assert_equal(original_length + 1, new_length)

    def test_labels(self):
        labels = self.classifier.labels()
        assert_true("positive" in labels)
        assert_true("negative" in labels)

    def test_show_informative_features(self):
        feats = self.classifier.show_informative_features()

    def test_informative_features(self):
        feats = self.classifier.informative_features(3)
        assert_true(isinstance(feats, list))
        assert_true(isinstance(feats[0], tuple))

    def test_custom_feature_extractor(self):
        cl = NaiveBayesClassifier(train_set, custom_extractor)
        cl.classify("Yay! I'm so happy it works.")
        assert_equal(cl.train_features[0][1], 'positive')

    def test_init_with_csv_file(self):
        with open(CSV_FILE) as fp:
            cl = NaiveBayesClassifier(fp, format="csv")
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_csv_file_without_format_specifier(self):
        with open(CSV_FILE) as fp:
            cl = NaiveBayesClassifier(fp)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_json_file(self):
        with open(JSON_FILE) as fp:
            cl = NaiveBayesClassifier(fp, format="json")
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_json_file_without_format_specifier(self):
        with open(JSON_FILE) as fp:
            cl = NaiveBayesClassifier(fp)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_custom_format(self):
        redis_train = [('I like turtles', 'pos'), ('I hate turtles', 'neg')]

        class MockRedisFormat(formats.BaseFormat):
            def __init__(self, client, port):
                self.client = client
                self.port = port

            @classmethod
            def detect(cls, stream):
                return True
#.........这里部分代码省略.........
开发者ID:Anhmike,项目名称:TextBlob,代码行数:103,代码来源:test_classifiers.py

示例5: process_questions

# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import update [as 别名]
    def process_questions(self):

        self._load_training_data()
        self._pload_processed_tuples()

        five_ws = [ "who", "what", "where", "when", "why" ]

        trigger_phrases = [
            "best practice",
            "best way",
            "simplest way",
            "preferred nomenclature",
            "preferred location",
            " have any recommendation",
            "exact command",
            "documentation",
            " doc for ",
            " doc about ",
            "tutorial",
            "release",
            "external inventory", "inventory file",
            "playbook", "play", "role", "task", "handler",
            "variable", "var",
            "connection", "async", "accelerate",
            "{{", "}}",
            "lookup", "plugin", "callback",
            "hang",
            "conditional", "when:"
            "group",
            "ec2 module", "route53",
            "fault tolerance",
            "public key"
        ]

        cl = NaiveBayesClassifier(self.train)
    
        ks = [ int(x) for x in self.logdata.keys() ]
        sorted_ks = sorted(ks)
        total_ks = sorted_ks[-1]
        for k in sorted_ks:
            k_str = str(k)
            print total_ks,"-",k_str

            this_msg = self.logdata[k_str]['message']
            text_obj = TextBlob(this_msg)

            if hasattr(text_obj, "raw_sentences"):
                for sent in text_obj.sentences:
                    try:
                        str(sent)
                    except UnicodeDecodeError:
                        #self.known_sentences.append(sent)
                        continue

                    if str(sent) in self.processed_tuples:
                        continue

                    if sent.endswith("?") and [ x for x in sent.words if x.lower() in five_ws ]:

                        curr_rating = cl.classify(sent)

                        triggered = False
                        for ph in trigger_phrases:
                            if ph in str(sent):
                                triggered = True                            

                        this_tuple = (k, sent, curr_rating, triggered)
                        self.processed_tuples[str(sent)] = this_tuple
                        #self.known_sentences.append(str(sent))

        # save what we have
        self._pdump_processed_tuples()

        for pt in self.processed_tuples.keys():        
            print "##############################\n"

            #import epdb; epdb.st()
            k = self.processed_tuples[pt][0]
            sent = self.processed_tuples[pt][1]
            curr_rating = self.processed_tuples[pt][2]
            triggered = self.processed_tuples[pt][3]

            print sent
            print "\n"
            print "rating: %s" % curr_rating
            print "triggered: %s" % triggered

            if ( curr_rating == "b" and triggered ) or ( curr_rating == "g" and not triggered ):
                #continue
                q_string = "\n$ g(ood) question or b(ad) question? (default: %s): " % curr_rating
                x = raw_input(q_string)                            
            else:
                x = str(curr_rating)

            print "\n"

            if x == "":
                this_tup = [ (str(sent), curr_rating) ]
                cl.update(this_tup)
                #self.known_sentences.append(str(sent))
#.........这里部分代码省略.........
开发者ID:jctanner,项目名称:civility,代码行数:103,代码来源:question-search-xchat.py

示例6: TestNaiveBayesClassifier

# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import update [as 别名]
class TestNaiveBayesClassifier(unittest.TestCase):

    def setUp(self):
        self.classifier = NaiveBayesClassifier(train_set)

    def test_default_extractor(self):
        text = "I feel happy this morning."
        assert_equal(self.classifier.extract_features(text), basic_extractor(text, train_set))

    def test_classify(self):
        res = self.classifier.classify("I feel happy this morning")
        assert_equal(res, 'positive')
        assert_equal(len(self.classifier.train_set), len(train_set))

    def test_classify_a_list_of_words(self):
        res = self.classifier.classify(["I", "feel", "happy", "this", "morning"])
        assert_equal(res, "positive")

    def test_train_from_lists_of_words(self):
        # classifier can be trained on lists of words instead of strings
        train = [(doc.split(), label) for doc, label in train_set]
        classifier = NaiveBayesClassifier(train)
        assert_equal(classifier.accuracy(test_set),
                        self.classifier.accuracy(test_set))

    def test_prob_classify(self):
        res = self.classifier.prob_classify("I feel happy this morning")
        assert_equal(res.max(), "positive")
        assert_true(res.prob("positive") > res.prob("negative"))

    def test_accuracy(self):
        acc = self.classifier.accuracy(test_set)
        assert_true(isinstance(acc, float))

    def test_update(self):
        res1 = self.classifier.prob_classify("lorem ipsum")
        original_length = len(self.classifier.train_set)
        self.classifier.update([("lorem ipsum", "positive")])
        new_length = len(self.classifier.train_set)
        res2 = self.classifier.prob_classify("lorem ipsum")
        assert_true(res2.prob("positive") > res1.prob("positive"))
        assert_equal(original_length + 1, new_length)

    def test_labels(self):
        labels = self.classifier.labels()
        assert_true("positive" in labels)
        assert_true("negative" in labels)

    def test_show_informative_features(self):
        feats = self.classifier.show_informative_features()

    def test_informative_features(self):
        feats = self.classifier.informative_features(3)
        assert_true(isinstance(feats, list))
        assert_true(isinstance(feats[0], tuple))

    def test_custom_feature_extractor(self):
        cl = NaiveBayesClassifier(train_set, custom_extractor)
        cl.classify("Yay! I'm so happy it works.")
        assert_equal(cl.train_features[0][1], 'positive')

    def test_init_with_csv_file(self):
        cl = NaiveBayesClassifier(CSV_FILE, format="csv")
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_csv_file_without_format_specifier(self):
        cl = NaiveBayesClassifier(CSV_FILE)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_json_file(self):
        cl = NaiveBayesClassifier(JSON_FILE, format="json")
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_json_file_without_format_specifier(self):
        cl = NaiveBayesClassifier(JSON_FILE)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_accuracy_on_a_csv_file(self):
        a = self.classifier.accuracy(CSV_FILE)
        assert_true(isinstance(a, float))

    def test_accuracy_on_json_file(self):
        a = self.classifier.accuracy(JSON_FILE)
        assert_true(isinstance(a, float))

    def test_init_with_tsv_file(self):
        cl = NaiveBayesClassifier(TSV_FILE)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_bad_format_specifier(self):
#.........这里部分代码省略.........
开发者ID:Arttii,项目名称:TextBlob,代码行数:103,代码来源:test_classifiers.py

示例7:

# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import update [as 别名]
# Test model with its two labels
print cl.classify(u" احسن علاج هذا")

# second cl model test
prob_dist = cl.prob_classify(u"ك يوم يا ظالم,")
print prob_dist.max()
print prob_dist.prob("positive")
print prob_dist.prob("negative")

# compute the accuracy on our test set
print "accuracy on the test set:{} ".format(cl.accuracy("testing.csv", format="csv"))

# display a listing of the most informative features.
cl.show_informative_features(5)

# add new data
new_data = [(u"كلام صحيح من شان هيك الدول اللي ما فيها بطالة والمجتمعات المفتوحة بتقل فيها المشاكل النفسية", 'positive'),
           (u"لا طبعا التقرب الى الله هو خير علاج للحالات النفسية", 'positive'),
           (u"تفائلوا بالخير تجدوه", 'positive'),
           (u"يا ترى الحكومه بدها تزيد دعم المواطن الي الله يكون في عونه", 'negative')]

# updating classifiers with new data
cl.update(new_data)

# test accuracy after adding new data to the generated model
print "accuracy on the test set:{} ".format(cl.accuracy("testing.csv", format="csv"))



开发者ID:a24ibrah,项目名称:Arabic_Classifier,代码行数:28,代码来源:classifier.py

示例8: selectTweets

# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import update [as 别名]
                
                #Print succes message
    print "> File opened successfully!"
                
    counter = 0
    for row in reader:
        selectTweets(row)
        counter += 1
                    
    print "> Wait a sec for the results..."
                    
    cl = NaiveBayesClassifier(trainTweets)
                
             
    print "> add another data set"
    cl.update(trainFeatures)  
    print "> finish combination"
    cl.show_informative_features(10)


    outputPos=open('positiveTweet.txt','a')
    outputNeg=open('negativeTweet.txt','a')
    dataset = str(raw_input("> Please enter a filename contains tweets: ")) 
    with open(dataset) as f:
         out = f.readlines()   
         for lines in out:
            tweetWords = []
            words = lines.split()
            for i in words:
                i = i.lower()
                i = i.strip('@#\'"?,.!')
开发者ID:Aditya-Shibrady,项目名称:Sentimental-Analysis-Engine-using-Python,代码行数:33,代码来源:SentimentalAnalysis.py

示例9: Text

# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import update [as 别名]
#	Classifying Text ( Call the classify(text) method to use the classifier.)
test_check = cl.classify("This is an amazing library!")
print test_check

#	You can get the label probability distribution with the prob_classify(text) method.

prob_dist = cl.prob_classify("This one's a doozy.")
print prob_dist.max()
print round(prob_dist.prob("pos"), 2)
print round(prob_dist.prob("neg"), 2)
print prob_dist.prob("pos")
print prob_dist.prob("neg")

blob = TextBlob("The beer is good. But the hangover is horrible.", classifier=cl)
print blob.classify()


# Evaluating Classifiers (To compute the accuracy on our test set, use the accuracy(test_data) method.)
print cl.accuracy(test)

# Updating Classifiers with New Data (Use the update(new_data) method to update a classifier with new training data.)

new_data = [('She is my best friend.', 'pos'),
 			("I'm happy to have a new friend.", 'pos'),
 			('Stay thirsty, my friend.', 'pos'),
 			("He ain't from around here.", 'neg')]

#print new_data
print cl.update(new_data)
print cl.accuracy(test)
开发者ID:saimadhu-polamuri,项目名称:textblob_learn,代码行数:32,代码来源:textblob_classification_system.py


注:本文中的textblob.classifiers.NaiveBayesClassifier.update方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。