本文整理汇总了Python中bigml.api.BigML.create_source方法的典型用法代码示例。如果您正苦于以下问题:Python BigML.create_source方法的具体用法?Python BigML.create_source怎么用?Python BigML.create_source使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类bigml.api.BigML
的用法示例。
在下文中一共展示了BigML.create_source方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: bigml
# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_source [as 别名]
def bigml( train_csv, test_csv, result_csv ):
api = BigML(dev_mode=True)
# train model
start_training = timer()
source_train = api.create_source(train_csv)
dataset_train = api.create_dataset(source_train)
model = api.create_model(dataset_train)
end_training = timer()
print('Training model.')
print('Training took %i Seconds.' % (end_training - start_training) );
# test create_model
start_test = timer()
source_test = api.create_source(test_csv)
dataset_test = api.create_dataset(source_test)
batch_prediction = api.create_batch_prediction(
model,
dataset_test,
{
"name": "census prediction",
"all_fields": True,
"header": False,
"confidence": False
}
)
# wait until batch processing is finished
while api.get_batch_prediction(batch_prediction)['object']['status']['progress'] != 1:
print api.get_batch_prediction(batch_prediction)['object']['status']['progress']
time.sleep(1)
end_test = timer()
print('Testing took %i Seconds' % (end_test - start_test) );
api.download_batch_prediction(batch_prediction['resource'], filename=result_csv)
# cleanup
api.delete_source(source_train)
api.delete_source(source_test)
api.delete_dataset(dataset_train)
api.delete_dataset(dataset_test)
api.delete_model(model)
示例2: BigMLAPIMixIn
# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_source [as 别名]
class BigMLAPIMixIn(object):
BIGML_AUTH_ERRMSG = (
"{errtype:s} BigML credentials. Please supply "
"BIGML_USERNAME and BIGML_API_KEY as either Scrapy "
"settings or environment variables."
)
# XXX: This should get a method to read BigML configuration from settings
def get_bigml_api(self, *args, **kwargs):
try:
self.bigml = BigML(*args, **kwargs)
except AttributeError:
raise NotConfigured(self.BIGML_AUTH_ERRMSG.format(errtype="Missing"))
if not self.check_bigml_auth():
raise NotConfigured(self.BIGML_AUTH_ERRMSG.format(errtype="Invalid"))
def check_bigml_auth(self):
return self.bigml.list_projects("limit=1")["code"] == 200
def export_to_bigml(self, path, name, as_dataset=False):
source = self.bigml.create_source(file, {"name": name})
if not as_dataset:
return source
return self.bigml.create_dataset(source, {"name": name})
示例3: BigML
# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_source [as 别名]
from bigml.api import BigML
api = BigML()
source1 = api.create_source("iris.csv")
api.ok(source1)
dataset1 = api.create_dataset(source1, \
{'name': u'iris dataset'})
api.ok(dataset1)
anomaly1 = api.create_anomaly(dataset1, \
{'name': u"iris dataset's anomaly detector"})
api.ok(anomaly1)
batchanomalyscore1 = api.create_batch_anomaly_score(anomaly1, dataset1, \
{'name': u"Batch Anomaly Score of iris dataset's anomaly detector with iris dataset",
'output_dataset': True})
api.ok(batchanomalyscore1)
dataset2 = api.get_dataset(batchanomalyscore1['object']['output_dataset_resource'])
api.ok(dataset2)
dataset2 = api.update_dataset(dataset2, \
{'fields': {u'000000': {'name': u'score'}},
'name': u'my_dataset_from_batch_anomaly_score_name'})
api.ok(dataset2)
示例4: BigML
# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_source [as 别名]
from bigml.api import BigML
api = BigML()
source1 = api.create_source("iris_sp_chars.csv", \
{'name': 'my_sóurcè_sp_name'})
api.ok(source1)
source1 = api.update_source(source1, \
{'fields': {'000000': {'name': 'sépal.length', 'optype': 'numeric'},
'000001': {'name': 'sépal&width', 'optype': 'numeric'},
'000002': {'name': 'pétal.length', 'optype': 'numeric'},
'000003': {'name': 'pétal&width\x00', 'optype': 'numeric'},
'000004': {'name': 'spécies', 'optype': 'categorical'}}})
api.ok(source1)
示例5: BigML
# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_source [as 别名]
from bigml.api import BigML
api = BigML()
source1_file = "iris.csv"
args = \
{'fields': {'000000': {'name': 'sepal length', 'optype': 'numeric'},
'000001': {'name': 'sepal width', 'optype': 'numeric'},
'000002': {'name': 'petal length', 'optype': 'numeric'},
'000003': {'name': 'petal width', 'optype': 'numeric'},
'000004': {'name': 'species',
'optype': 'categorical',
'term_analysis': {'enabled': True}}},
}
source2 = api.create_source(source1_file, args)
api.ok(source2)
args = \
{'objective_field': {'id': '000004'},
}
dataset1 = api.create_dataset(source2, args)
api.ok(dataset1)
args = \
{'all_fields': False,
'new_fields': [{'field': '(all-but "000001")',
'names': ['sepal length',
'petal length',
'petal width',
'species']},
{'field': '2', 'names': ['new']}],
'objective_field': {'id': '000004'},
}
dataset2 = api.create_dataset(dataset1, args)
api.ok(dataset2)
示例6: BigML
# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_source [as 别名]
# <codecell>
import numpy as np
import pandas as pd
from bigml.api import BigML
# <codecell>
# Create a BigML instance
api = BigML()
# <codecell>
# Create source instance with train dataset
train_source = api.create_source('train.csv')
# <codecell>
# Create a BigML dataset from source instance
train_dataset = api.create_dataset(train_source)
# <codecell>
# Fit a model to the dataset
model = api.create_ensemble(train_dataset)
# <codecell>
# Read the test dataset
test_X = pd.read_csv('test.csv')
示例7: BigML
# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_source [as 别名]
#@see: http://bigml.readthedocs.org/en/latest/#local-predictions
from bigml.api import BigML
api = BigML('smarkit',"37b903bf765414b5e1c3164061cee5fa57e7e6ad",storage='./storage')
source = api.create_source('./data/red_bule_balls_2003.csv')
api.pprint(api.get_fields(source))
dataset = api.create_dataset(source)
model = api.create_model(dataset)
prediction = api.create_prediction(model, {'red':[1,2,3,4,5,6],'blue':7})
#prediction
api.pprint(prediction)
示例8: BigMLTester
# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_source [as 别名]
class BigMLTester(ForestTester):
api = None
authenticated = False
source_res = None
ensemble_res = None
logger = None
train_time = -1
predict_time = -1
results = None
test_data = None
def __init__(self,*args,**kwargs):
print args
print kwargs
bigml_user = kwargs.get('bigml_user',None)
bigml_key = kwargs.get('bigml_key',None)
ForestTester.__init__(self,*args,**kwargs)
self.authenticate(bigml_user,bigml_key)
self.logger = logging.getLogger(__name__)
self.logger.addHandler(logging.FileHandler('BigMLTester.log'))
self.logger.setLevel(logging.DEBUG)
def authenticate(self,bigml_user,bigml_key):
"""
initialize the BigML API, do a short test to check authentication
"""
self.api = BigML(username=bigml_user,api_key=bigml_key)
result = self.api.list_sources()
if result['code'] == 200:
self.authenticated = True
else:
self.authenticated = False
def upload_source(self,filename):
"""
Upload a sourcefile to BigML. Return resource value.
"""
assert self.authenticated, 'Not authenticated!'
# check if source file has already been uploaded
query_string = 'name={}'.format(filename)
matching_sources = self.api.list_sources(query_string)['objects']
if len(matching_sources) > 0:
source = matching_sources[0]
self.logger.info('{0} is already present in BigML'.format(basename(filename)))
else:
self.logger.info('uploading source to BigML...')
source = self.api.create_source(filename,{'name':filename})
# enter polling loop until source becomes ready
check_resource(source['resource'],self.api.get_source)
return source['resource']
def make_dataset(self,source_res):
"""
Create a BigML dataset from the given source resource. Returns dataset
resource value.
"""
assert self.authenticated, 'Not authenticated!'
# check if dataset has already been created
query_string = 'source={}'.format(source_res)
matching_datasets = self.api.list_datasets(query_string)['objects']
if len(matching_datasets) > 0:
dataset = matching_datasets[0]
self.logger.info('A dataset already exits for this source')
else:
filename = self.api.get_source(source_res)['object']['file_name']
datasetname = "{0}'s dataset".format(filename)
dataset = self.api.create_dataset(source_res,{'name':datasetname})
# enter polling loop until dataset becomes ready
check_resource(dataset['resource'],self.api.get_dataset)
return dataset['resource']
def train_ensemble(self,train_data):
assert self.authenticated, 'Not authenticated!'
ensemble_args = {'number_of_models':self.n_trees,
'sample_rate':self.sample_rate,
'randomize':self.randomize,
'replacement':self.bootstrap,
'tlp':5}
ensemble = self.api.create_ensemble(train_data,ensemble_args)
self.ensemble_res = ensemble['resource']
# enter polling loop until ensemble becomes ready
ensemble = check_resource(self.ensemble_res,self.api.get_ensemble)
self.logger.info('Ensemble is ready')
self.train_time = ensemble['object']['status']['elapsed']/1000
def test_ensemble(self,test_file):
assert self.authenticated, 'Not authenticated!'
# download a local copy of the ensemble
self.logger.info('Creating local ensemble')
#.........这里部分代码省略.........
示例9: main
# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_source [as 别名]
def main(args):
print('initialize BigML API')
if args.username and args.apikey:
api = BigML(args.username,args.apikey)
else:
api = BigML()
print('generate cross validation splits')
cv_files = generate_cross_validation(args.filename,args.nfolds)
cv_datasets = []
params = {'tags':[args.tag]}
if args.objective_field >= 0:
params['objective_field'] = {'id':'%06x' % args.objective_field}
for (train_file,test_file) in cv_files:
if args.sequential:
# wait for source before creating dataset
train_source = api.create_source(train_file,params)
train_dataset = api.create_dataset(train_source,params)
if api.ok(train_dataset):
test_source = api.create_source(test_file,params)
test_dataset = api.create_dataset(test_source,params)
else:
# upload sources in parallel and create datasets in parallel
train_source = api.create_source(train_file,params)
test_source = api.create_source(test_file,params)
train_dataset = api.create_dataset(train_source,params)
test_dataset = api.create_dataset(test_source,params)
cv_datasets.append((train_dataset,test_dataset))
# don't pass objective field to model
del(params['objective_field'])
# wait for dataset creation to finish so we can find out the number of features
dataset_res = api.check_resource(cv_datasets[0][0],api.get_dataset)
dataset_obj = dataset_res['object']
# initial feature set
field_ids = dataset_obj['fields'].keys()
field_ids.remove(dataset_obj['objective_field']['id'])
initial_state = [False for id in field_ids]
# do best-first search
done = False
open_list = [(initial_state,0)]
closed_list = []
best_accuracy = -1
best_unchanged_count = 0
while not done:
(v,fv) = find_max_state(open_list)
v_ids = [field_ids[i] for (i,val) in enumerate(v) if val]
print('Max state is: %s\n Accuracy = %f' % (v_ids,fv))
closed_list.append((v,fv))
open_list.remove((v,fv))
if (fv - EPSILON) > best_accuracy:
best_state = v
best_accuracy = fv
best_unchanged_count = 0
print('new best state')
else:
best_unchanged_count += 1
children = expand_state(v)
for c in children:
if (c not in [pair[0] for pair in open_list]
and c not in [pair[0] for pair in closed_list]):
input_fields = [id for (i,id) in enumerate(field_ids) if c[i]]
print('Evaluating %s' % input_fields)
params['input_fields'] = input_fields
val = evaluate(cv_datasets,params,api,args.penalty,args.sequential)
open_list.append((c,val))
if best_unchanged_count >= args.staleness:
done = True
best_features = [field_ids[i] for (i,val) in enumerate(best_state) if val]
print('The best feature subset is: %s \n Accuracy = %0.2f%%' % (best_features,best_accuracy*100))
print('Evaluated %d/%d feature subsets' % ((len(open_list) + len(closed_list)),2**len(field_ids)))
示例10: BigML
# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_source [as 别名]
from bigml.api import BigML
api = BigML()
source1 = api.create_source("iris_sp_chars.csv", \
{'name': u'my_s\xf3urc\xe8_sp_name'})
api.ok(source1)
source1 = api.update_source(source1, \
{'fields': {u'000000': {'name': u's\xe9pal.length', 'optype': u'numeric'},
u'000001': {'name': u's\xe9pal&width', 'optype': u'numeric'},
u'000002': {'name': u'p\xe9tal.length', 'optype': u'numeric'},
u'000003': {'name': u'p\xe9tal&width\x00', 'optype': u'numeric'},
u'000004': {'name': u'sp\xe9cies', 'optype': u'categorical'}}})
api.ok(source1)
示例11: main
# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_source [as 别名]
def main(args=sys.argv[1:]):
"""Parses command-line parameters and calls the actual main function.
"""
parser = argparse.ArgumentParser(
description="Dataset analysis",
epilog="BigML, Inc")
# source with activity data
parser.add_argument('--source',
action='store',
dest='source',
default=None,
help="Full path to file")
# create private links or not
parser.add_argument('--share',
action='store_true',
default=False,
help="Share created resources or not")
# weight models or not
parser.add_argument('--balance',
action='store_true',
default=False,
help="Weight model or not")
args = parser.parse_args(args)
if not args.source:
sys.exit("You need to provide a valid path to a source")
api = BigML()
name = "Sean's activity"
log("Creating source...")
source_args = {'name': name}
source = api.create_source(args.source, source_args)
if not api.ok(source):
sys.exit("Source isn't ready...")
log("Creating dataset...")
dataset = api.create_dataset(source)
if not api.ok(dataset):
sys.exit("Dataset isn't ready...")
log("Transforming dataset...")
# Extends dataset with new field for previous activity, previous duration,
# start day, and start hour. Removes first column, start, and end fields.
new_dataset_args = {
'name': name,
'new_fields': new_fields(),
'all_but': excluded_fields()}
new_dataset = api.create_dataset(dataset, new_dataset_args)
if not api.ok(new_dataset):
sys.exit("Dataset isn't ready...")
# Set objective field to activity
fields = Fields(new_dataset['object']['fields'])
objective_id = fields.field_id('activity')
new_dataset_args = {
'objective_field': {'id': objective_id}}
new_dataset = api.update_dataset(new_dataset, new_dataset_args)
# Create training and test set for evaluation
log("Splitting dataset...")
training, test = train_test_split(api, new_dataset)
log("Creating a model using the training dataset...")
model_args = {
'objective_field': objective_id,
'balance_objective': args.balance,
'name': training['object']['name']}
model = api.create_model(training, model_args)
if not api.ok(model):
sys.exit("Model isn't ready...")
# Creating an evaluation
log("Evaluating model against the test dataset...")
eval_args = {
'name': name + ' - 80% vs 20%'}
evaluation = api.create_evaluation(model, test, eval_args)
if not api.ok(evaluation):
sys.exit("Evaluation isn't ready...")
log("Creating model for the full dataset...")
model = api.create_model(new_dataset, model_args)
if not api.ok(model):
sys.exit("Model isn't ready...")
# Create private links
if args.share:
log("Sharing resources...")
dataset_private_link = share_dataset(api, new_dataset)
model_private_link = share_model(api, model)
evaluation_private_link = share_evaluation(api, evaluation)
log(dataset_private_link)
log(model_private_link)
log(evaluation_private_link)
示例12: BigML
# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_source [as 别名]
from bigml.api import BigML
if __name__ == "__main__":
print "test"
api = BigML("onidzelskyi", "a5b11ebe462ad583478cf40daf17e92060dc5915", dev_mode=True)
source = api.create_source("./data/iris.csv")
dataset = api.create_dataset(source)
model = api.create_model(dataset)
prediction = api.create_prediction(model,{"sepal length": 5, "sepal width": 2.5})
api.pprint(prediction)
示例13: BigML
# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_source [as 别名]
from bigml.api import BigML
import csv
import time
api = BigML(dev_mode=True)
# get args
train_csv = sys.argv[1]
test_csv = sys.argv[2]
# train model
source_train = api.create_source('./../../data/census/train.csv')
dataset_train = api.create_dataset(dataset_train)
model = api.create_model(dataset)
# test model
with open('./data/census/test.csv', 'rb') as csv_test_file:
test_csv_reader = csv.reader(csv_test_file, delimiter=',', quotechar='"')
for row in test_csv_reader:
row.pop()
row = dict(zip(range(0, len(row)), row))
prediction = api.create_prediction(model, row)
api.pprint(prediction)
示例14: main
# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_source [as 别名]
def main(args=sys.argv[1:]):
"""Parses command-line parameters and calls the actual main function.
"""
parser = argparse.ArgumentParser(description="Market sentiment analysis", epilog="BigML, Inc")
# source with activity data
parser.add_argument("--data", action="store", dest="data", default="data", help="Full path to data with csv files")
# create private links or not
parser.add_argument("--share", action="store_true", default=True, help="Share created resources or not")
args = parser.parse_args(args)
if not args.data:
sys.exit("You need to provide a valid path to a data directory")
api = BigML()
name = "UpOrDown?"
log("Creating sources...")
csvs = glob.glob(os.path.join(args.data, "*.csv"))
sources = []
for csv in csvs:
source = api.create_source(csv)
api.ok(source)
sources.append(source)
log("Creating datasets...")
datasets = []
for source in sources:
dataset = api.create_dataset(source)
api.ok(dataset)
datasets.append(dataset)
new_datasets = []
for dataset in datasets:
new_dataset = api.create_dataset(dataset, {"new_fields": new_fields(), "all_fields": False})
new_datasets.append(new_dataset)
log("Merging datasets...")
multi_dataset = api.create_dataset(new_datasets, {"name": name})
api.ok(multi_dataset)
# Create training and test set for evaluation
log("Splitting dataset...")
training, test = training_test_split(api, multi_dataset)
log("Creating a model using the training dataset...")
model = api.create_model(training, {"name": name + " (80%)"})
api.ok(model)
# Creating an evaluation
log("Evaluating model against the test dataset...")
eval_args = {"name": name + " - Single model: 80% vs 20%"}
evaluation_model = api.create_evaluation(model, test, eval_args)
api.ok(evaluation_model)
log("Creating an ensemble using the training dataset...")
ensemble = api.create_ensemble(training, {"name": name})
api.ok(ensemble)
# Creating an evaluation
log("Evaluating ensemble against the test dataset...")
eval_args = {"name": name + " - Ensemble: 80% vs 20%"}
evaluation_ensemble = api.create_evaluation(ensemble, test, eval_args)
api.ok(evaluation_ensemble)
log("Creating model for the full dataset...")
model = api.create_model(multi_dataset, {"name": name})
api.ok(model)
# Create private links
if args.share:
log("Sharing resources...")
dataset_link = share_resource(api, multi_dataset)
model_link = share_resource(api, model)
evaluation_model_link = share_resource(api, evaluation_model)
evaluation_ensemble_link = share_resource(api, evaluation_ensemble)
log(dataset_link)
log(model_link)
log(evaluation_model_link)
log(evaluation_ensemble_link)
示例15: BigML
# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_source [as 别名]
from bigml.api import BigML
api = BigML()
source1 = api.create_source("iris.csv", \
{'name': u'my_source_name'})
api.ok(source1)