本文整理汇总了Python中classifier.Classifier.predict方法的典型用法代码示例。如果您正苦于以下问题:Python Classifier.predict方法的具体用法?Python Classifier.predict怎么用?Python Classifier.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类classifier.Classifier
的用法示例。
在下文中一共展示了Classifier.predict方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: GetNewArticles
# 需要导入模块: from classifier import Classifier [as 别名]
# 或者: from classifier.Classifier import predict [as 别名]
def GetNewArticles(request):
# Get the articles from RSS
# aggregator = NewsAggregator()
# list_of_articles = aggregator.feedreader()
classifier = Classifier("filename.pkl")
# Predict
list_of_classes = []
# with open("articles_dump", "wb") as dump:
# pickle.dump(list_of_articles, dump, pickle.HIGHEST_PROTOCOL)
with open("articles_dump") as dump:
list_of_articles = pickle.load(dump)
for article in list_of_articles:
list_of_classes.append(article["content"])
# print list_of_classes
res = classifier.predict(np.asarray(list_of_classes))
for i in range(0, len(list_of_articles)):
if res[i] == 1:
cat = "Sports"
elif res[i] == 2:
cat = "Economy_business_finance"
elif res[i] == 3:
cat = "Science_technology"
else:
cat = "Lifestyle_leisure"
element = list_of_articles[i]
list_of_articles[i]["category"] = cat
article = Article(article_title=element["title"], article_content=element["content"], article_category=cat)
article.save()
json_object = json.dumps(list_of_articles)
return HttpResponse(json_object)
示例2: __init__
# 需要导入模块: from classifier import Classifier [as 别名]
# 或者: from classifier.Classifier import predict [as 别名]
class Subscriber:
def __init__(self, pool_size=10):
socket.setdefaulttimeout(3)
self.pool = threadpool.ThreadPool(pool_size)
self.documents = MongoClient().rss.documents
self.classifier = Classifier()
def consume(self, line):
try:
feeder = feedparser.parse(line)
if "title" in feeder.feed.keys():
site_title = feeder.feed["title"]
else:
site_title = u"No title found"
for entry in feeder.entries:
doc = {"site_url": line, "site_title": unicode(site_title)}
for item in [
"title",
"link",
"summary",
"content",
"published_parsed",
"tags",
"author",
"summary_detail",
]:
if item in entry.keys():
doc[item] = entry[item]
doc["published_parsed"] = datetime.fromtimestamp(mktime(doc["published_parsed"]))
if "content" not in doc.keys():
doc["content"] = doc["summary"]
else:
doc["content"] = doc["content"][0]["value"]
print doc["title"].encode("utf8")
if self.documents.find({"link": doc["link"]}).count() == 0:
try:
doc["category"] = self.classifier.predict(BeautifulSoup(doc["content"]).text)
print doc["category"]
except Exception, e:
print e
self.documents.insert(doc)
print doc["title"].encode("utf8")
except Exception, e:
print e
示例3: run_iteration
# 需要导入模块: from classifier import Classifier [as 别名]
# 或者: from classifier.Classifier import predict [as 别名]
def run_iteration(iteration, hash_map):
lbp = LocalBinaryPatterns(24, 8)
data = []
labels = []
#Finding all images
images = [os.path.join(root, name) for root, dirs, files in os.walk("../training_images")
for name in files if name.endswith((".jpeg", ".jpg"))]
#Spliting it into training and testing groups
training, testing = train_test_split(images, test_size = 0.25)
#Training Phase
for imagePath in training:
#Load the image, convert it to grayscale, and compute LBP
image = cv2.imread(imagePath)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
if imagePath in hash_map:
hist = hash_map[imagePath]
else:
hist = lbp.compute(gray)
hash_map[imagePath] = hist
print str(iteration) + " DEBUG(Training): Computed LBP Histogram for " + imagePath
#Plotting histogram if needed
#plt.bar(bin_edges[:-1], hist, width = 1)
#plt.xlim(min(bin_edges), max(bin_edges))
#plt.show()
#Extract the label from the image path, then update the label and data lists
labels.append(imagePath.split("/")[-2])
data.append(hist)
#Train classifier
classifier = Classifier("SVM")
print "\n\n" + str(iteration) + " DEBUG: Training Classifier"
classifier.train(data, labels)
print "\n\n" + str(iteration) + " DEBUG: Trained Classifier\n\n"
#Testing Phase
data = []
labels = []
for imagePath in testing:
#Load the image, convert to grayscale, describe it and classify it
image = cv2.imread(imagePath)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
if imagePath in hash_map:
hist = hash_map[imagePath]
else:
hist = lbp.compute(gray)
hash_map[imagePath] = hist
print str(iteration) + " DEBUG(Testing): Computed LBP Histogram for " + imagePath
data.append(hist)
labels.append(imagePath.split("/")[-2])
print "\n\n" + str(iteration) + " DEBUG: Forming predictions"
predictions = classifier.predict(data)
counter = 0
print "\n\n" + str(iteration) + " DEBUG: Printing predictions\n\n"
for index, prediction in enumerate(predictions):
print "Name -> " + testing[index] + " Actual -> " + labels[index] + " Prediction -> " + prediction
if labels[index] == prediction:
counter = counter + 1
accuracy = (float(counter)/float(len(predictions))) * 100.0
print "\n\n" + str(iteration) + " The Classifier Accuracy was " + str(accuracy) + "%"
return accuracy
示例4: Classifier
# 需要导入模块: from classifier import Classifier [as 别名]
# 或者: from classifier.Classifier import predict [as 别名]
for imagePath in training:
#Load the image, convert it to grayscale, and compute LBP
image = cv2.imread(imagePath)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
hist = lbp.compute(gray)
#Extract the label from the image path, then update the label and data lists
labels.append(imagePath.split("/")[-2])
data.append(hist)
#Train classifier
classifier = Classifier("Chi-Squared")
classifier.train(data, labels)
#Testing Phase
data = []
testing = [os.path.join(root, name) for root, dirs, files in os.walk("../testing_images")
for name in files if name.endswith((".jpeg", ".jpg"))]
for imagePath in testing:
#Load the image, convert to grayscale, describe it and classify it
image = cv2.imread(imagePath)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
hist = lbp.compute(gray)
data.append(hist)
predictions = classifier.predict(data)
for index, prediction in enumerate(predictions):
print "Name -> " + testing[index] + " Prediction -> " + prediction
示例5: Classifier
# 需要导入模块: from classifier import Classifier [as 别名]
# 或者: from classifier.Classifier import predict [as 别名]
from classifier import Classifier
import kernel
import numpy as np
X = [
np.array([1, 1]),
np.array([1, 2]),
np.array([2, 1]),
np.array([2, 2]),
np.array([3, 3]),
np.array([3, 4]),
np.array([4, 3]),
np.array([4, 4])
]
Y = np.array([
'bottom', 'bottom', 'bottom', 'bottom',
'top', 'top', 'top', 'top'
])
svm_classifier = Classifier(X, Y)
print svm_classifier.w
print svm_classifier.bias
for x in X:
print svm_classifier.predict(x)
示例6: int
# 需要导入模块: from classifier import Classifier [as 别名]
# 或者: from classifier.Classifier import predict [as 别名]
import sys
from pic import Pic
from classifier import Classifier
from score import error
limit = int(sys.argv[1])
half = limit/2
data, targets = Pic.data(limit)
data = Pic.flatten(data)
data, cv_data = data[:half], data[half:]
targets, cv_targets = targets[:half], targets[half:]
preds = Classifier.predict(data, targets, data)
print error(preds, cv_targets)
#print preds
#print cv_targets
示例7: SklearnClassifier
# 需要导入模块: from classifier import Classifier [as 别名]
# 或者: from classifier.Classifier import predict [as 别名]
# classifier.test()
i = 0.2
accuracies = []
fscores = []
cs = []
while i <= 5:
c = SklearnClassifier(Pipeline([('clf', LinearSVC(C=i))]))
classifier = Classifier(c, feature_set)
classifier.train()
accuracy, fscore = classifier.test()
accuracies.append(accuracy)
fscores.append(fscore)
cs.append(i)
i += 0.2
print i
plt.plot(cs, accuracies, label='Accuracy', linewidth=2)
plt.plot(cs, fscores, label='F1-score', linewidth=2)
plt.xlabel('C')
plt.legend(loc='lower right')
plt.show()
t = 'a'
while t != '':
t = raw_input('>')
if t:
tags = tag_text(t)
features = dataset.__convern_to_count_dictionary(tags, n_gram=n)
classifier.predict(features)
示例8: Classifier
# 需要导入模块: from classifier import Classifier [as 别名]
# 或者: from classifier.Classifier import predict [as 别名]
import pickle
from scipy import io
from scipy.sparse import csr_matrix
import numpy
from classifier import Classifier
c = Classifier('../data/sl_data/value_func_model.bst', '../data/sl_data/value_func_X_encoders.pickle', '../data/sl_data/value_func_cats.pickle', '../data/sl_data/value_func_Y_encoder.pickle', value_function=True)
X = io.mmread('../data/sl_data/value_func_features.csv.mtx')
X = X.tocsc()
print c.predict(X[0:50, :])
if c.value_function:
print numpy.round(c.predict(X[0:50, :]))
else:
print c.target_label_encoder.inverse_transform(numpy.argmax(c.predict(X[0:50, :]), axis=1))
示例9: partyprograms
# 需要导入模块: from classifier import Classifier [as 别名]
# 或者: from classifier.Classifier import predict [as 别名]
def partyprograms(folder='model'):
clf = Classifier(folder=folder)
# converted with pdftotext
text = {}
bow = {}
# from https://www.spd.de/linkableblob/96686/data/20130415_regierungsprogramm_2013_2017.pdf
txt = open(folder+'/textdata/SPD_programm.txt').read()
# remove page footer
txt = re.sub(r'\W+Das Regierungsprogramm 2013 – 2017\W+\d+\W+','\n',txt)
# split in sections
txt = re.split('\n(IX|IV|V?I{0,3}\.\d? )',txt)
text['spd'] = txt
# from http://www.cdu.de/sites/default/files/media/dokumente/regierungsprogramm-2013-2017-langfassung-20130911.pdf
txt = open(folder+'/textdata/CDU_programm.txt').read()
# remove page footer
txt = re.sub(r'\W+Gemeinsam erfolgreich für Deutschland | Regierungsprogramm 2013 – 2017\W+','\n',txt)
# remove page numbers
txt = re.sub(r'\n\d+\n',' ',txt)
# get sections
txt = re.split(r'\n\d\.\d?\W',txt)
# remove sections without proper text
txt = [t for t in txt if len(t)>1000]
text['cdu'] = txt
# from https://www.die-linke.de/fileadmin/download/wahlen2013/bundestagswahlprogramm/bundestagswahlprogramm2013_langfassung.pdf
txt = open(folder+'/textdata/LINKE_programm.txt').read()
# remove page numbers
txt = re.sub(r'\n\d+\n',' ',txt)
# get sections
txt = re.split('\n\n+',txt)
# remove sections without proper text
txt = [t for t in txt if len(t)>1000]
text['linke'] = txt
# from http://www.gruene.de/fileadmin/user_upload/Dokumente/Wahlprogramm/Wahlprogramm-barrierefrei.pdf
txt = open(folder+'/textdata/GRUENE_programm.txt').read()
# remove page footer
txt = re.sub(r'(\d+)?\W+Bundestagswahlprogramm 2013\nBündnis 90/Die Grünen\W+\d?\n','\n',txt)
txt = re.sub(r'Teilhaben. Einmischen. Zukunft schaffen.','',txt)
txt = re.sub(r'Zeit für den grünen Wandel','',txt)
# remove page numbers
txt = re.sub(r'\n\d+\n',' ',txt)
# get sections
txt = re.split(r'\n\d\.\d?\W',txt)
# remove sections without proper text
txt = [t for t in txt if len(t)>1000]
text['gruene'] = txt
json.dump(text,open(folder+'/textdata/programs.json', 'wb'),ensure_ascii=False)
predictions,predictions_total = dict(),dict()
Ytrue, Yhat = [],[]
for key in text.keys():
predictions[key] = []
# for each paragraph separately
for paragraph in text[key]:
prediction = clf.predict(paragraph)['prediction']
idx = argmax([x['probability'] for x in prediction])
Yhat.append(text.keys().index(prediction[idx]['party']))
predictions[key].append(prediction)
#predictions[key] = map(lambda x: clf.predict(x)['prediction'],text[key])
# for the entire program at once
predictions_total[key] = clf.predict(' '.join(text[key]))['prediction']
Ytrue.extend(ones(len(text[key]))*text.keys().index(key))
print(confusion_matrix(Ytrue,Yhat))
print(classification_report(Ytrue,Yhat,target_names=text.keys()))
json.dump(predictions,open(folder+'/textdata/predictions.json','wb'),ensure_ascii=False)
json.dump(predictions_total,open(folder+'/textdata/predictions_total.json','wb'),ensure_ascii=False)
示例10: multicategories_predict
# 需要导入模块: from classifier import Classifier [as 别名]
# 或者: from classifier.Classifier import predict [as 别名]
def multicategories_predict(samples_test, model_name, result_dir):
if model_name is None or len(model_name) == 0:
logging.warn(Logger.warn("model_name must not be NULL."))
return
if result_dir is None:
cfm_file = "%s.cfm" % (model_name)
sfm_file = "%s.sfm" % (model_name)
else:
if not os.path.isdir(result_dir):
try:
os.mkdir(result_dir)
except OSError:
logging.error(Logger.error("mkdir %s failed." % (result_dir)))
return
cfm_file = "%s/%s.cfm" % (result_dir, model_name)
sfm_file = "%s/%s.sfm" % (result_dir, model_name)
logging.debug(Logger.error("Loading train sample feature matrix ..."))
sfm_train = SampleFeatureMatrix()
sfm_train.load(sfm_file)
logging.debug(Logger.debug("Loading train category feature matrix ..."))
cfm_train = CategoryFeatureMatrix()
cfm_train.load(cfm_file)
logging.debug(Logger.debug("Making sample feature matrix for test data ..."))
category_id = 2000000
sfm_test = SampleFeatureMatrix(sfm_train.get_category_id_map(), sfm_train.get_feature_id_map())
features = cfm_train.get_features(category_id)
for sample_id in samples_test.tsm.sample_matrix():
(sample_category, sample_terms, term_map) = samples_test.tsm.get_sample_row(sample_id)
category_1_id = Categories.get_category_1_id(sample_category)
sfm_test.set_sample_category(sample_id, category_1_id)
for feature_id in features:
if feature_id in term_map:
feature_weight = features[feature_id]
sfm_test.add_sample_feature(sample_id, feature_id, feature_weight)
logging.debug(Logger.debug("train sample feature matrix - features:%d categories:%d" % (sfm_train.get_num_features(), sfm_train.get_num_categories())))
X_train, y_train = sfm_train.to_sklearn_data()
logging.debug(Logger.debug("test sample feature matrix - features:%d categories:%d" % (sfm_test.get_num_features(), sfm_test.get_num_categories())))
X_test, y_test = sfm_test.to_sklearn_data()
clf = Classifier()
logging.debug(Logger.debug("Classifier training ..."))
clf.train(X_train, y_train)
logging.debug(Logger.debug("Classifier predicting ..."))
categories = samples_test.get_categories()
categories_1_names = []
categories_1_idx_map = {}
categories_1_idlist = categories.get_categories_1_idlist()
for category_id in categories_1_idlist:
category_idx = sfm_test.get_category_idx(category_id)
category_name = categories.get_category_name(category_id)
categories_1_idx_map[category_idx] = (category_id, category_name)
categories_1_idx_list = sorted_dict(categories_1_idx_map)
for (category_idx, (category_id, category_name)) in categories_1_idx_list:
categories_1_names.append("%s(%d)" % (category_name, category_id))
clf.predict(X_test, y_test, categories_1_names)