当前位置: 首页>>代码示例>>Python>>正文


Python MultinomialNB.predict_proba方法代码示例

本文整理汇总了Python中sklearn.naive_bayes.MultinomialNB.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python MultinomialNB.predict_proba方法的具体用法?Python MultinomialNB.predict_proba怎么用?Python MultinomialNB.predict_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.naive_bayes.MultinomialNB的用法示例。


在下文中一共展示了MultinomialNB.predict_proba方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: MultinomialNaiveBayesGridSearch_OLD

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
def MultinomialNaiveBayesGridSearch_OLD():  
    # C=1 is best
    cs = 10.0**np.arange(-9,2,0.5)
    aucs = []
    for c in cs:
        clf = MultinomialNB(alpha=c).fit(f_train, y_train)
        probs = clf.predict_proba(f_test)
        fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
        roc_auc = auc(fpr,tpr)
        cstr = '%0.2e'%c
        myplt = st.plotROC(fpr,tpr,roc_auc,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=False,
                    title='Grid Search - Multinomial Naive Bayes ROC Curve')
        aucs.append(roc_auc)
    best = 0
    for i in range(len(cs)):
        if aucs[i] > aucs[best]:
            best = i
    c = cs[best]
    clf = MultinomialNB(alpha=c).fit(f_train, y_train)
    probs = clf.predict_proba(f_test)
    fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
    myplt = st.plotROC(fpr,tpr,roc_auc,
                    legendlabel='Best alpha = %0.2e' % c,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=True,
                    title='Grid Search - Multinomial Naive Bayes ROC Curve')
    myplt.show()
    return clf
开发者ID:charlesdguthrie,项目名称:frontrow,代码行数:36,代码来源:working_JMJ2.py

示例2: recommend

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
def recommend(twitterword):
    newpd = get_words_df()
    #newpd = pd.read_csv('twitter_bigdf_appended_cleanedtweets_averageperuser.csv')
    newpd['Tweet'] = newpd['Tweet'].map(lambda x: str(x))

    newpd['was_retweeted'] = newpd['average_retweet_threshold']

    best_alpha = 50.0
    best_min_df = 0.01

    vectorizer = CountVectorizer(min_df=best_min_df)
    x, y = make_xy(newpd, vectorizer)
    xtrain, xtest, ytrain, ytest = train_test_split(x, y)

    clf = MultinomialNB(alpha=best_alpha).fit(xtrain, ytrain)

    probs = clf.predict_log_proba(x)[:, 0]

    prob = clf.predict_proba(x)[:, 0]
    predict = clf.predict(x)

    retweet_chance = clf.predict_proba(vectorizer.transform([twitterword]))

    answer = retweet_chance[0][1] * 100
    return answer
开发者ID:2dpodcast,项目名称:cs109_twitterapp,代码行数:27,代码来源:twitterword.py

示例3: test_mnnb

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
def test_mnnb(kind):
    # Test Multinomial Naive Bayes classification.
    # This checks that MultinomialNB implements fit and predict and returns
    # correct values for a simple toy dataset.

    if kind == 'dense':
        X = X2
    elif kind == 'sparse':
        X = scipy.sparse.csr_matrix(X2)

    # Check the ability to predict the learning set.
    clf = MultinomialNB()
    assert_raises(ValueError, clf.fit, -X, y2)
    y_pred = clf.fit(X, y2).predict(X)

    assert_array_equal(y_pred, y2)

    # Verify that np.log(clf.predict_proba(X)) gives the same results as
    # clf.predict_log_proba(X)
    y_pred_proba = clf.predict_proba(X)
    y_pred_log_proba = clf.predict_log_proba(X)
    assert_array_almost_equal(np.log(y_pred_proba), y_pred_log_proba, 8)

    # Check that incremental fitting yields the same results
    clf2 = MultinomialNB()
    clf2.partial_fit(X[:2], y2[:2], classes=np.unique(y2))
    clf2.partial_fit(X[2:5], y2[2:5])
    clf2.partial_fit(X[5:], y2[5:])

    y_pred2 = clf2.predict(X)
    assert_array_equal(y_pred2, y2)

    y_pred_proba2 = clf2.predict_proba(X)
    y_pred_log_proba2 = clf2.predict_log_proba(X)
    assert_array_almost_equal(np.log(y_pred_proba2), y_pred_log_proba2, 8)
    assert_array_almost_equal(y_pred_proba2, y_pred_proba)
    assert_array_almost_equal(y_pred_log_proba2, y_pred_log_proba)

    # Partial fit on the whole data at once should be the same as fit too
    clf3 = MultinomialNB()
    clf3.partial_fit(X, y2, classes=np.unique(y2))

    y_pred3 = clf3.predict(X)
    assert_array_equal(y_pred3, y2)
    y_pred_proba3 = clf3.predict_proba(X)
    y_pred_log_proba3 = clf3.predict_log_proba(X)
    assert_array_almost_equal(np.log(y_pred_proba3), y_pred_log_proba3, 8)
    assert_array_almost_equal(y_pred_proba3, y_pred_proba)
    assert_array_almost_equal(y_pred_log_proba3, y_pred_log_proba)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:51,代码来源:test_naive_bayes.py

示例4: self_training

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
def self_training(attribute,iterate_count,initial_data_count,new_data_count):
    from data_constructor import construct
    print ''

    construct(attribute,initial_data_count)
    unlabel_train_x,unlabel_train_y,unlabel_train_uids=get_data(attribute,'train_unlabel')
    train_x,train_y,train_uids=get_data(attribute,'train')
    test_x,test_y,_=get_data(attribute,'test')

    scores=[]
    for i in xrange(iterate_count):
        print '----------------'
        print 'Iterate: %d'%i
        print 'Labeled training data size: %d'%(len(train_x))
        print 'Unlabeled training data size: %d'%(len(unlabel_train_x))
        print 'Testing data size: %d'%(len(test_x))
        clf=MultinomialNB()
        clf.fit(train_x,train_y)
        score=clf.score(test_x,test_y)
        print 'Accurate: %0.4f'%score
        scores.append(score)
        result=clf.predict_proba(unlabel_train_x)
        good_x,good_y,bad_x,bad_y=extract_new_data(zip(unlabel_train_x,result),new_data_count)
        if len(good_x)==0:
            print 'No more new train data!'
            break
        print 'New training data size: %d'%(len(good_x))
        train_x=numpy.concatenate((train_x, good_x), axis=0)
        train_y=numpy.concatenate((train_y, good_y), axis=0)
        unlabel_train_x,unlabel_train_y=bad_x,bad_y
    print '--------'
    for s in scores:
        print s
    print '--------'
开发者ID:Adoni,项目名称:JD_Profiling,代码行数:36,代码来源:learn.py

示例5: bag_of_words_probabilities

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
def bag_of_words_probabilities(train_reviews, test_reviews):
    """ Implements a baseline bag-of-words classifier.  Returns a dictionary mapping tuples (review_id, class) to the probability that that review belongs to that class. """
    train_corpus = []
    test_corpus = []
    Y_train = []
    for review_id in train_reviews:
        review = train_reviews[review_id]
        train_corpus.append(review["text"])
        Y_train.append(review["rating"])

    vectorizer = CountVectorizer(stop_words = 'english')
    X_train = vectorizer.fit_transform(train_corpus)

    for review_id in test_reviews:
        review = test_reviews[review_id]
        test_corpus.append(review["text"])

    # clf = LinearSVC(class_weight = 'auto').fit(X_train, Y_train)
    # clf = LogisticRegression().fit(X_train, Y_train)
    clf = MultinomialNB().fit(X_train, Y_train)

    X_test = vectorizer.transform(test_corpus)
    Y_probability = clf.predict_proba(X_test)

    probability_dict = {}
    review_id_list = test_reviews.keys()
    for i in range(len(review_id_list)):
        probability_dict[review_id_list[i]] = Y_probability[i][1]

    return probability_dict
开发者ID:Milstein,项目名称:yelp_social_sentiment,代码行数:32,代码来源:baselineclassifier.py

示例6: __init__

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
class RecommenderNB:
	min_score = None
	stop_words = ["a","a's","able","about","above","according","accordingly","across","actually","after","afterwards","again","against","ain't","all","allow","allows","almost","alone","along","already","also","although","always","am","among","amongst","an","and","another","any","anybody","anyhow","anyone","anything","anyway","anyways","anywhere","apart","appear","appreciate","appropriate","are","aren't","around","as","aside","ask","asking","associated","at","available","away","awfully","b","be","became","because","become","becomes","becoming","been","before","beforehand","behind","being","believe","below","beside","besides","best","better","between","beyond","both","brief","but","by","c","c'mon","c's","came","can","can't","cannot","cant","cause","causes","certain","certainly","changes","clearly","co","com","come","comes","concerning","consequently","consider","considering","contain","containing","contains","corresponding","could","couldn't","course","currently","d","definitely","described","despite","did","didn't","different","do","does","doesn't","doing","don't","done","down","downwards","during","e","each","edu","eg","eight","either","else","elsewhere","enough","entirely","especially","et","etc","even","ever","every","everybody","everyone","everything","everywhere","ex","exactly","example","except","f","far","few","fifth","first","five","followed","following","follows","for","former","formerly","forth","four","from","further","furthermore","g","get","gets","getting","given","gives","go","goes","going","gone","got","gotten","greetings","h","had","hadn't","happens","hardly","has","hasn't","have","haven't","having","he","he's","hello","help","hence","her","here","here's","hereafter","hereby","herein","hereupon","hers","herself","hi","him","himself","his","hither","hopefully","how","howbeit","however","i","i'd","i'll","i'm","i've","ie","if","ignored","immediate","in","inasmuch","inc","indeed","indicate","indicated","indicates","inner","insofar","instead","into","inward","is","isn't","it","it'd","it'll","it's","its","itself","j","just","k","keep","keeps","kept","know","knows","known","l","last","lately","later","latter","latterly","least","less","lest","let","let's","like","liked","likely","little","look","looking","looks","ltd","m","mainly","many","may","maybe","me","mean","meanwhile","merely","might","more","moreover","most","mostly","much","must","my","myself","n","name","namely","nd","near","nearly","necessary","need","needs","neither","never","nevertheless","new","next","nine","no","nobody","non","none","noone","nor","normally","not","nothing","novel","now","nowhere","o","obviously","of","off","often","oh","ok","okay","old","on","once","one","ones","only","onto","or","other","others","otherwise","ought","our","ours","ourselves","out","outside","over","overall","own","p","particular","particularly","per","perhaps","placed","please","plus","possible","presumably","probably","provides","q","que","quite","qv","r","rather","rd","re","really","reasonably","regarding","regardless","regards","relatively","respectively","right","s","said","same","saw","say","saying","says","second","secondly","see","seeing","seem","seemed","seeming","seems","seen","self","selves","sensible","sent","serious","seriously","seven","several","shall","she","should","shouldn't","since","six","so","some","somebody","somehow","someone","something","sometime","sometimes","somewhat","somewhere","soon","sorry","specified","specify","specifying","still","sub","such","sup","sure","t","t's","take","taken","tell","tends","th","than","thank","thanks","thanx","that","that's","thats","the","their","theirs","them","themselves","then","thence","there","there's","thereafter","thereby","therefore","therein","theres","thereupon","these","they","they'd","they'll","they're","they've","think","third","this","thorough","thoroughly","those","though","three","through","throughout","thru","thus","to","together","too","took","toward","towards","tried","tries","truly","try","trying","twice","two","u","un","under","unfortunately","unless","unlikely","until","unto","up","upon","us","use","used","useful","uses","using","usually","uucp","v","value","various","very","via","viz","vs","w","want","wants","was","wasn't","way","we","we'd","we'll","we're","we've","welcome","well","went","were","weren't","what","what's","whatever","when","whence","whenever","where","where's","whereafter","whereas","whereby","wherein","whereupon","wherever","whether","which","while","whither","who","who's","whoever","whole","whom","whose","why","will","willing","wish","with","within","without","won't","wonder","would","would","wouldn't","x","y","yes","yet","you","you'd","you'll","you're","you've","your","yours","yourself","yourselves"]
	
	def __init__(self, num_hashtags=40):
		RecommenderNB.min_score = float(1/(float(num_hashtags)-1.0))
		self.tl = TweetLib()
		print "Generating classifier ... "
		documents = self.tl.get_hashtag_documents(num_hashtags)
		corpus = [b for a, b in documents]
		self.hashtags = [a for a,b in documents]
		all_classes = range(len(documents))
		self.vectorizer = TfidfVectorizer(stop_words='english')
		self.xtrain = self.vectorizer.fit_transform(corpus)
		self.ytrain = all_classes
		self.parameters = {'alpha': 0.01}
		self.clf = MultinomialNB(**self.parameters).partial_fit(self.xtrain, self.ytrain, self.ytrain)
		print "Classifier has been generated..."

	def recommend(self, tweet):
		tweet = " ".join([w.lower() for w in tweet.split() if not w.lower() in RecommenderNB.stop_words])
		xtest = self.vectorizer.transform([tweet])
		pred = self.clf.predict_proba(xtest)[0]
		sorted_pred = sorted(enumerate(pred), key=lambda x:x[1])
		max_score = max([b for a,b in sorted_pred])
		if max_score < RecommenderNB.min_score:
			return None
		else:
			return list(reversed([self.hashtags[i[0]] for i in sorted_pred]))
开发者ID:ben444422,项目名称:Hashtag-Recommender-App,代码行数:31,代码来源:NaiveBayes_production.py

示例7: predict

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
def predict(cur, plyr_id, game_plyrs): 
  #creates training set (called 'X') for plyr
  all_plyrs = all_player_ids(cur) #np.array - all NFL players (and coaches)
  games = games_played_in(cur, plyr_id) #np.array - the games_ids the player played in
  n_cols = all_plyrs.shape[0] #int 
  m_rows = games.shape[0] #int
  w = weights(games)
  zeros = np.zeros((m_rows, n_cols)) #2darr - used to initialize DF
  X = pd.DataFrame(zeros, index=games, columns=all_plyrs) #dataframe
  populate_training_set(cur, X, games, plyr_id)
  #print "X: ", X.values
  
  ###run coaches_model and then im here### 
  #creates vector of known output values
  Y = training_output_vector(cur, games, plyr_id) #good
  #print "(len) Y: ", len(Y), Y
  test_zeros = np.zeros((1, n_cols)) #2darr - used to initialize DF
  test_X = pd.DataFrame(zeros, columns=all_plyrs) #dataframe
  update_training_matrix(cur, game_plyrs, 0, test_X)
  
  #run Bernoulli NB Classifier
  nb_clf = MultinomialNB()
  
  if len(X.values) == 0:
    return 0
  nb_clf.fit(X, Y, sample_weight=w)
  nb_predictions = nb_clf.predict(test_X)
  #print "test_X: ", test_X.values
  nb_norm_prob = normalize_probs(nb_clf.predict_proba(test_X)[0])
  avgs = [3,8,12.5,17,21,25]
  #print "probs: ", nb_norm_prob
  #print avgs
  ev = expected_val(nb_norm_prob, avgs) #can also calc dot product
  return round(ev,1)
开发者ID:kwheeler27,项目名称:insight_datasci,代码行数:36,代码来源:mn_predictions.py

示例8: MultinomialNBClassify_Proba

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
def MultinomialNBClassify_Proba(enrollment_id, trainData, trainLabel, testData):
    nbClf = MultinomialNB() # default alpha=1.0, Laplace smoothing
    # settinf alpha < 1 is called Lidstone smoothing
    nbClf.fit(trainData, ravel(trainLabel))
    testLabel = nbClf.predict_proba(testData)[:,1]
    saveResult(enrollment_id, testLabel, 'Proba_sklearn_MultinomialNB_alpha=0.1_Result.csv')
    return testLabel
开发者ID:ElvisKwok,项目名称:code,代码行数:9,代码来源:test.py

示例9: NaiveBayesClassifier

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
class NaiveBayesClassifier():
    tfidf_transformer = TfidfTransformer(norm = None)
    
    def __init__(self, keywords_path, dataset_path):
        print 'Initializing NaiveBayesClassifier..'
        self.data_collector = NaiveBayesDataCollector(keywords_path, dataset_path)
        
        self.keywords = self.data_collector.keywords
        self.documents = self.data_collector.documents
        self.target_classes = self.data_collector.target_classes
        
        self.count_vectorizer = CountVectorizer(min_df=1, tokenizer=tokenize, vocabulary = self.keywords)
    
    def train(self):
        print 'Training Naive Bayes..'
        
        print 'Running Count Vectorizer..'
        X_train_counts = self.count_vectorizer.fit_transform(self.documents)
        
#        print 'Headers:', self.count_vectorizer.get_feature_names()
#        print 'X_train_counts:\n', X_train_counts.toarray()
#        print 'count_vect.vocabulary_:', self.count_vectorizer.vocabulary_
        
        print 'Performing tf-idf transform..'
        
        X_train_tfidf = self.tfidf_transformer.fit_transform(X_train_counts)
        #print 'X_train_tfidf.shape:', X_train_tfidf.shape
#        print 'X_train_tfidf:\n', X_train_tfidf.toarray()
        
        self.clf = MultinomialNB(fit_prior=False).fit(X_train_tfidf, self.target_classes)
        
    def classify(self, param):
        if isinstance(param, list):
            docs_new = param
        else:
            docs_new = [param]
        X_new_counts = self.count_vectorizer.transform(docs_new)
#        print 'X_new_counts:', X_new_counts
        X_new_tfidf = self.tfidf_transformer.transform(X_new_counts)
        
        predicted = self.clf.predict(X_new_tfidf)
        predicted_prob = self.clf.predict_proba(X_new_tfidf)
        
        """
        print
        print 'Prediction:'
        for doc, category in zip(docs_new, predicted):
            print '%r => %s' % (doc, category)
        
        print
        for doc, prob in zip(docs_new, predicted_prob):
            print '%r => %s' % (doc, prob)
        """
        return_val = []
        for row in predicted_prob:
            prob_data = {}
            for prob, category in zip(row, self.clf.classes_):
               prob_data[category] = prob
            return_val.append(prob_data)
        return return_val
开发者ID:c0ns0le,项目名称:gitbook,代码行数:62,代码来源:nb_classifier.py

示例10: classifyNaiveBayes

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
def classifyNaiveBayes(Xtr, ytr, Xte, yte, reduceDim="none", targetDim=0):
    """ Classified data using Naive Bayes """
    try:
        accuracyRate, timing, probabilities = 0.0, 0.0, []
        # Reduce dimensionality if requested
        Xtr = reduceDimensionality(Xtr, ytr, reduceDim, targetDim) if reduceDim != "none" else Xtr
        Xte = reduceDimensionality(Xte, yte, reduceDim, targetDim) if reduceDim != "none" else Xte
        # Make sure values are positive because MultinomialNB doesn't take negative features
        Xtr = flipSign(Xtr, "+")
        Xte = flipSign(Xte, "+")
        # Perform classification
        nbClassifier = MultinomialNB()
        prettyPrint("Training the Naive Bayes algorithm", "debug")
        startTime = time.time()
        nbClassifier.fit(numpy.array(Xtr), numpy.array(ytr))
        # Now test the trained algorithm
        prettyPrint("Submitting the test samples", "debug")
        predicted = nbClassifier.predict(Xte)
        endTime = time.time()
        # Compare the predicted and ground truth
        accuracyRate = round(metrics.accuracy_score(predicted, yte), 2)
        probabilities = nbClassifier.predict_proba(Xte)
        # Finally, calculate the time taken to train and classify
        timing = endTime-startTime

    except Exception as e:
        prettyPrint("Error encountered in \"classifyNaiveBayes\": %s" % e, "error")
    
    return accuracyRate, timing, probabilities, predicted
开发者ID:tum-i22,项目名称:Oedipus,代码行数:31,代码来源:classification.py

示例11: main

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
def main():

  # 3/5 train
  trainFeature1 = genfromtxt('trainFeatureWithCounting.csv', delimiter=',')[0::5]
  trainLabel1 = genfromtxt('trainLabel.csv', delimiter='\n')[0::5]
  trainFeature2 = genfromtxt('trainFeatureWithCounting.csv', delimiter=',')[2::5]
  trainLabel2 = genfromtxt('trainLabel.csv', delimiter='\n')[2::5]
  trainFeature3 = genfromtxt('trainFeatureWithCounting.csv', delimiter=',')[4::5]
  trainLabel3 = genfromtxt('trainLabel.csv', delimiter='\n')[4::5]
  trainFeature = np.concatenate((trainFeature1, trainFeature2,trainFeature3))
  trainLabel = np.concatenate((trainLabel1, trainLabel2,trainLabel3))

  # 2/5 in trainset to test
  trainFeature_test1 = genfromtxt('trainFeatureWithCounting.csv', delimiter=',')[1::5]
  trainLabel_test1 = genfromtxt('trainLabel.csv', delimiter='\n')[1::5]
  trainFeature_test2 = genfromtxt('trainFeatureWithCounting.csv', delimiter=',')[3::5]
  trainLabel_test2 = genfromtxt('trainLabel.csv', delimiter='\n')[3::5]
  trainFeature_test = np.concatenate((trainFeature_test1, trainFeature_test2))
  trainLabel_test = np.concatenate((trainLabel_test1, trainLabel_test2))

  #testset
  testFeature = genfromtxt('testFeatureWithCounting.csv', delimiter=',')


  clf = MultinomialNB()
  clf.fit(trainFeature, trainLabel)
  MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
  header = "Id,ARSON,ASSAULT,BAD CHECKS,BRIBERY,BURGLARY,DISORDERLY CONDUCT,DRIVING UNDER THE INFLUENCE,DRUG/NARCOTIC,DRUNKENNESS,EMBEZZLEMENT,EXTORTION,FAMILY OFFENSES,FORGERY/COUNTERFEITING,FRAUD,GAMBLING,KIDNAPPING,LARCENY/THEFT,LIQUOR LAWS,LOITERING,MISSING PERSON,NON-CRIMINAL,OTHER OFFENSES,PORNOGRAPHY/OBSCENE MAT,PROSTITUTION,RECOVERED VEHICLE,ROBBERY,RUNAWAY,SECONDARY CODES,SEX OFFENSES FORCIBLE,SEX OFFENSES NON FORCIBLE,STOLEN PROPERTY,SUICIDE,SUSPICIOUS OCC,TREA,TRESPASS,VANDALISM,VEHICLE THEFT,WARRANTS,WEAPON LAWS"

  #testset
  dec1 = clf.predict_proba(testFeature)
  #trainset to test
  dec2 = clf.predict_proba(trainFeature_test)


  fmt1=['%d'] + ['%1.4f'] * dec1.shape[1]
  fmt2=['%d'] + ['%1.4f'] * dec2.shape[1]

  # normalized to [0,1] by Henry
  # dec = asarray([[(i - min(j)) / (max(j) - min(j)) for i in j] for j in dec])

  #ind = [i for i in xrange(1,len(dec)+1)] by Henry
  dec1 = insert(dec1, 0, range(len(dec1)), axis=1)
  savetxt("predict_NaiveBayes_96_testset.csv", dec1, delimiter=",", header=header, fmt=fmt1, comments="")

  dec2 = insert(dec2, 0, range(len(dec2)), axis=1)
  savetxt("predict_NaiveBayes_96_trainset_to_test.csv", dec2, delimiter=",", header=header, fmt=fmt2, comments="")
开发者ID:applerman,项目名称:criminal_analysis,代码行数:49,代码来源:naiveBayesTrain_ensemble.py

示例12: WeightedPartialFitPassiveTransferClassifier

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
class WeightedPartialFitPassiveTransferClassifier(object):
  def __init__(self, target_weight):
    self.classifier = MultinomialNB()
    self.target_weight = target_weight
    self.vectorizer = FullContextBagOfWordsLeftRightCutoff(9)

  # Train on unambiguous annotatios which have a group number
  def train_source(self, annotations):
    X = self.vectorizer.fit_transform(annotations)
    y = numpy.array([annotation.get_group_number() for annotation in annotations])

    self.classifier.fit(X, y)

  # Train on ambiguous annotations with according group labels
  def train_target_online(self, annotations, labels):
    X = self.vectorizer.transform(annotations)
    y = numpy.array([Annotation.GROUP_MAPPING[label] for label in labels])

    weight_vector = [self.target_weight] * len(annotations)
    self.classifier.partial_fit(X, y, Annotation.GROUP_MAPPING.values(), weight_vector)

  def get_group_number_prob_pair(self, annotation, prob_vector):
    group_option_indices = annotation.get_group_number()
    group_option_prob = [prob_vector[group_option_index] for group_option_index in group_option_indices]
    return max(zip(group_option_indices, group_option_prob), key = lambda (index, prob): prob)
 
  def get_group_number(self, annotation, prob_vector):
    group_index, _ = self.get_group_number_prob_pair(annotation, prob_vector)
    return group_index

  # tested, results for the classifier trained on source are not random
  def predict(self, annotations):
    X = self.vectorizer.transform(annotations)
    probs = self.classifier.predict_proba(X) # [n_samples, n_classes]
    return numpy.array([self.get_group_number(annotation, row)
     for row, annotation in itertools.izip(probs, annotations)])

  # tested, results for the classifier trained on source are not random
  def get_max_probability(self, annotation, prob_vector):
    _, prob = self.get_group_number_prob_pair(annotation, prob_vector)
    return prob

  def get_prob_estimates(self, annotations):
    X = self.vectorizer.transform(annotations)
    probs = self.classifier.predict_proba(X)
    return numpy.array([self.get_max_probability(annotation, row)
      for row, annotation in itertools.izip(probs, annotations)])
开发者ID:martinthenext,项目名称:eth_ml,代码行数:49,代码来源:transfer.py

示例13: __init__

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
class Classifier:
    """Used to allow the adding and removing of speeches to the classifer.
    This could be made faster by actually modifying or extending the MultinomialNB
    in scikit-learn rather than creating a new MultinomialNB object each time."""

    def __init__(self, vocab=None):
        self.vectorizer = TfidfVectorizer(min_df=2, vocabulary=vocab)
        self.classifier = MultinomialNB(alpha=0.1,fit_prior=True)

    def train_classifier(self, data, target):
        sparse_data = self.vectorizer.fit_transform(data)
        app.logger.debug("training classifier")
        self.classifier.fit(sparse_data, target)

    def classify_document(self, document):
        app.logger.debug("classifying document")
        tfidf_frames_vector = self.vectorizer.transform([document])
        return self.classifier.predict_proba(tfidf_frames_vector)[0]

    def cross_validation(self, documents, targets):
        """
        Instantiate a new classifier and run this function.
        Do not run train_classifier
        """
        # documentation
        # sklearn.cross_validation.cross_val_score(estimator, X, y=None, scoring=None,
        #   cv=None, n_jobs=1, verbose=0, fit_params=None, score_func=None, pre_dispatch='2*n_jobs')¶
        X = self.vectorizer.fit(documents)
        y = targets
        return cross_val_score(self.classifier, X, y, cv=5)

    @staticmethod
    def bunch_with_targets (speeches, target_function):
        '''This function is an alternative form of the loads in sklearn which loads
        from a partiular file structure. This function allows me to load from the database
        '''

        app.logger.debug('Building bunch containing data and target vector.')

        target = [] # 0 and 1 for subgroup a and b respectively
        target_names = ['a','b'] # target_names
        data = [] # data

        for speech in speeches:
            target.append(target_function(speech))
            speech_string = ''
            for sentence in speech.speaking:
                speech_string += sentence
            data.append(speech_string)

        DESCR = "Trained subgroup_a vs subgroup_b classifier"

        # Bunch - https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/datasets/base.py
        return Bunch(
            target = target,
            target_names = target_names,
            data = data,
            DESCR = DESCR
        )
开发者ID:PoliticalFraming,项目名称:politicalframing,代码行数:61,代码来源:classifier.py

示例14: NaiveBayesModel

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
class NaiveBayesModel(BaseModel):
    
    def __init__(self, cached_feature):
        BaseModel.__init__(self, cached_feature)
        self.model = MultinomialNB(alpha=0.01, fit_prior=True)

    def _predict_internal(self, X_test):
        return self.model.predict_proba(X_test)[:, 1]
开发者ID:sjuvekar,项目名称:Kaggle-Dato,代码行数:10,代码来源:naive_bayes_model.py

示例15: MultinomialNB_pred

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
def MultinomialNB_pred(X_train, X_test, y_train):
    clf = MultinomialNB(alpha=0.1, fit_prior=True)
    clf = clf.fit(X_train, y_train)

    predictions = clf.predict_proba(X_test)  # these are predictions for both classes, so non-clicks and clicks

    # Get only the predictions for clicks
    predictions_click = []
    for pred in predictions:
        predictions_click.append(pred[1])

    predictions_train = clf.predict_proba(X_train)
    predictions_train_click = []
    for pred in predictions_train:
        predictions_train_click.append(pred[1])

    return predictions_click, predictions_train_click
开发者ID:poddar,项目名称:predictive,代码行数:19,代码来源:naive_bayes_noFS.py


注:本文中的sklearn.naive_bayes.MultinomialNB.predict_proba方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。