Python BigML.create_dataset方法代码示例

本文整理汇总了Python中bigml.api.BigML.create_dataset方法的典型用法代码示例。如果您正苦于以下问题：Python BigML.create_dataset方法的具体用法？Python BigML.create_dataset怎么用？Python BigML.create_dataset使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类bigml.api.BigML的用法示例。

在下文中一共展示了BigML.create_dataset方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: bigml

# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_dataset [as 别名]
def bigml( train_csv, test_csv, result_csv ):

    api = BigML(dev_mode=True)

    # train model
    start_training = timer()

    source_train = api.create_source(train_csv)
    dataset_train = api.create_dataset(source_train)
    model = api.create_model(dataset_train)

    end_training = timer()
    print('Training model.')
    print('Training took %i Seconds.' % (end_training - start_training) ); 

    # test create_model
    start_test = timer()

    source_test = api.create_source(test_csv)
    dataset_test = api.create_dataset(source_test)

    batch_prediction = api.create_batch_prediction(
        model, 
        dataset_test,
        {
            "name": "census prediction", 
            "all_fields": True,
            "header": False,
            "confidence": False
        }
    )

    # wait until batch processing is finished
    while api.get_batch_prediction(batch_prediction)['object']['status']['progress'] != 1:
        print api.get_batch_prediction(batch_prediction)['object']['status']['progress']
        time.sleep(1)

    end_test = timer()
    print('Testing took %i Seconds' % (end_test - start_test) ); 

    api.download_batch_prediction(batch_prediction['resource'], filename=result_csv)

    # cleanup
    api.delete_source(source_train)
    api.delete_source(source_test)
    api.delete_dataset(dataset_train)
    api.delete_dataset(dataset_test)
    api.delete_model(model)

开发者ID:marcharding，项目名称:ml-saas-comparison，代码行数:50，代码来源:saas_bigml.py

示例2: BigMLAPIMixIn

# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_dataset [as 别名]
class BigMLAPIMixIn(object):

    BIGML_AUTH_ERRMSG = (
        "{errtype:s} BigML credentials. Please supply "
        "BIGML_USERNAME and BIGML_API_KEY as either Scrapy "
        "settings or environment variables."
    )

    # XXX: This should get a method to read BigML configuration from settings

    def get_bigml_api(self, *args, **kwargs):
        try:
            self.bigml = BigML(*args, **kwargs)
        except AttributeError:
            raise NotConfigured(self.BIGML_AUTH_ERRMSG.format(errtype="Missing"))
        if not self.check_bigml_auth():
            raise NotConfigured(self.BIGML_AUTH_ERRMSG.format(errtype="Invalid"))

    def check_bigml_auth(self):
        return self.bigml.list_projects("limit=1")["code"] == 200

    def export_to_bigml(self, path, name, as_dataset=False):
        source = self.bigml.create_source(file, {"name": name})
        if not as_dataset:
            return source
        return self.bigml.create_dataset(source, {"name": name})

开发者ID:scrapy-plugins，项目名称:scrapy-bigml，代码行数:28，代码来源:__init__.py

示例3: BigML

# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_dataset [as 别名]
    api = BigML()
    source1_file = "iris.csv"
    args = \
        {u'fields': {u'000000': {u'name': u'sepal length', u'optype': u'numeric'},
                     u'000001': {u'name': u'sepal width', u'optype': u'numeric'},
                     u'000002': {u'name': u'petal length', u'optype': u'numeric'},
                     u'000003': {u'name': u'petal width', u'optype': u'numeric'},
                     u'000004': {u'name': u'species',
                                 u'optype': u'categorical',
                                 u'term_analysis': {u'enabled': True}}}}
    source2 = api.create_source(source1_file, args)
    api.ok(source2)

    args = \
        {u'objective_field': {u'id': u'000004'}}
    dataset1 = api.create_dataset(source2, args)
    api.ok(dataset1)

    args = \
        {u'cluster_seed': u'bigml', u'critical_value': 5}
    cluster1 = api.create_cluster(dataset1, args)
    api.ok(cluster1)

    args = \
        {u'fields_map': {u'000000': u'000000',
                         u'000001': u'000001',
                         u'000002': u'000002',
                         u'000003': u'000003',
                         u'000004': u'000004'},
         u'output_dataset': True}
    batchcentroid1 = api.create_batch_centroid(cluster1, dataset1, args)

开发者ID:shantanusharma，项目名称:bigmler，代码行数:33，代码来源:reify_batch_centroid_dataset_dataset.py

示例4: BigML

# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_dataset [as 别名]
from bigml.api import BigML
api = BigML()

source1 = api.create_source("iris.csv")
api.ok(source1)

dataset1 = api.create_dataset(source1, \
    {'name': u'my_dataset_name'})
api.ok(dataset1)

开发者ID:ABourcevet，项目名称:bigmler，代码行数:11，代码来源:reify_dataset.py

示例5: main

# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_dataset [as 别名]
def main(args=sys.argv[1:]):
    """Parses command-line parameters and calls the actual main function.

    """
    parser = argparse.ArgumentParser(description="Market sentiment analysis", epilog="BigML, Inc")

    # source with activity data
    parser.add_argument("--data", action="store", dest="data", default="data", help="Full path to data with csv files")

    # create private links or not
    parser.add_argument("--share", action="store_true", default=True, help="Share created resources or not")

    args = parser.parse_args(args)

    if not args.data:
        sys.exit("You need to provide a valid path to a data directory")

    api = BigML()

    name = "UpOrDown?"

    log("Creating sources...")
    csvs = glob.glob(os.path.join(args.data, "*.csv"))
    sources = []
    for csv in csvs:
        source = api.create_source(csv)
        api.ok(source)
        sources.append(source)

    log("Creating datasets...")
    datasets = []
    for source in sources:
        dataset = api.create_dataset(source)
        api.ok(dataset)
        datasets.append(dataset)

    new_datasets = []
    for dataset in datasets:
        new_dataset = api.create_dataset(dataset, {"new_fields": new_fields(), "all_fields": False})
        new_datasets.append(new_dataset)

    log("Merging datasets...")
    multi_dataset = api.create_dataset(new_datasets, {"name": name})
    api.ok(multi_dataset)

    # Create training and test set for evaluation
    log("Splitting dataset...")
    training, test = training_test_split(api, multi_dataset)

    log("Creating a model using the training dataset...")
    model = api.create_model(training, {"name": name + " (80%)"})
    api.ok(model)

    # Creating an evaluation
    log("Evaluating model against the test dataset...")
    eval_args = {"name": name + " - Single model: 80% vs 20%"}
    evaluation_model = api.create_evaluation(model, test, eval_args)
    api.ok(evaluation_model)

    log("Creating an ensemble using the training dataset...")
    ensemble = api.create_ensemble(training, {"name": name})
    api.ok(ensemble)

    # Creating an evaluation
    log("Evaluating ensemble against the test dataset...")
    eval_args = {"name": name + " - Ensemble: 80% vs 20%"}
    evaluation_ensemble = api.create_evaluation(ensemble, test, eval_args)
    api.ok(evaluation_ensemble)

    log("Creating model for the full dataset...")
    model = api.create_model(multi_dataset, {"name": name})
    api.ok(model)

    # Create private links
    if args.share:
        log("Sharing resources...")
        dataset_link = share_resource(api, multi_dataset)
        model_link = share_resource(api, model)
        evaluation_model_link = share_resource(api, evaluation_model)
        evaluation_ensemble_link = share_resource(api, evaluation_ensemble)
        log(dataset_link)
        log(model_link)
        log(evaluation_model_link)
        log(evaluation_ensemble_link)

开发者ID:jaor，项目名称:upordown，代码行数:86，代码来源:upordown.py

示例6: BigML

# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_dataset [as 别名]
 api = BigML()
 source1_file = "iris.csv"
 args = \
     {u'fields': {u'000000': {u'name': u'sepal length', u'optype': u'numeric'},
                  u'000001': {u'name': u'sepal width', u'optype': u'numeric'},
                  u'000002': {u'name': u'petal length', u'optype': u'numeric'},
                  u'000003': {u'name': u'petal width', u'optype': u'numeric'},
                  u'000004': {u'name': u'species',
                              u'optype': u'categorical',
                              u'term_analysis': {u'enabled': True}}}}
 source2 = api.create_source(source1_file, args)
 api.ok(source2)
 
 args = \
     {u'objective_field': {u'id': u'000004'}}
 dataset1 = api.create_dataset(source2, args)
 api.ok(dataset1)
 
 args = \
     {u'anomaly_seed': u'bigml', u'seed': u'bigml'}
 anomaly1 = api.create_anomaly(dataset1, args)
 api.ok(anomaly1)
 
 args = \
     {u'fields_map': {u'000000': u'000000',
                      u'000001': u'000001',
                      u'000002': u'000002',
                      u'000003': u'000003',
                      u'000004': u'000004'},
      u'output_dataset': True}
 batchanomalyscore1 = api.create_batch_anomaly_score(anomaly1, dataset1, args)

开发者ID:shantanusharma，项目名称:bigmler，代码行数:33，代码来源:reify_batch_anomaly_score_dataset.py

示例7: BigML

# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_dataset [as 别名]
from bigml.api import BigML

# <codecell>

# Create a BigML instance
api = BigML()

# <codecell>

# Create source instance with train dataset
train_source = api.create_source('train.csv')

# <codecell>

# Create a BigML dataset from source instance
train_dataset = api.create_dataset(train_source)

# <codecell>

# Fit a model to the dataset
model = api.create_ensemble(train_dataset)

# <codecell>

# Read the test dataset
test_X = pd.read_csv('test.csv')
test_y = pd.read_csv('test_target.csv')
test_set = test_X.T.to_dict().values()

# <codecell>

开发者ID:rishy，项目名称:phishing-websites，代码行数:32，代码来源:BigML_classification.py

示例8: BigML

# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_dataset [as 别名]
from bigml.api import BigML
api = BigML()

source1 = api.create_source("iris.csv")
api.ok(source1)

dataset1 = api.create_dataset(source1)
api.ok(dataset1)

dataset2 = api.create_dataset(dataset1, \
    {'name': u"iris' dataset - sample (30.00%)",
     'out_of_bag': True,
     'sample_rate': 0.7})
api.ok(dataset2)

dataset3 = api.create_dataset(dataset1, \
    {'name': u"iris' dataset - sample (70.00%)", 'sample_rate': 0.7})
api.ok(dataset3)

model1 = api.create_model(dataset3)
api.ok(model1)

evaluation1 = api.create_evaluation(model1, dataset2, \
    {'name': u'my_evaluation_name'})
api.ok(evaluation1)

开发者ID:javinp，项目名称:bigmler，代码行数:27，代码来源:reify_evaluation_split.py

示例9: main

# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_dataset [as 别名]
def main(args=sys.argv[1:]):
    """Parses command-line parameters and calls the actual main function.

    """
    parser = argparse.ArgumentParser(
        description="Dataset analysis",
        epilog="BigML, Inc")

    # source with activity data
    parser.add_argument('--source',
                        action='store',
                        dest='source',
                        default=None,
                        help="Full path to file")

    # create private links or not
    parser.add_argument('--share',
                        action='store_true',
                        default=False,
                        help="Share created resources or not")

    # weight models or not
    parser.add_argument('--balance',
                        action='store_true',
                        default=False,
                        help="Weight model or not")

    args = parser.parse_args(args)

    if not args.source:
        sys.exit("You need to provide a valid path to a source")

    api = BigML()

    name = "Sean's activity"

    log("Creating source...")
    source_args = {'name': name}
    source = api.create_source(args.source, source_args)
    if not api.ok(source):
        sys.exit("Source isn't ready...")

    log("Creating dataset...")
    dataset = api.create_dataset(source)
    if not api.ok(dataset):
        sys.exit("Dataset isn't ready...")

    log("Transforming dataset...")
    # Extends dataset with new field for previous activity, previous duration,
    # start day, and start hour. Removes first column, start, and end fields.
    new_dataset_args = {
        'name': name,
        'new_fields': new_fields(),
        'all_but': excluded_fields()}
    new_dataset = api.create_dataset(dataset, new_dataset_args)
    if not api.ok(new_dataset):
        sys.exit("Dataset isn't ready...")

    # Set objective field to activity
    fields = Fields(new_dataset['object']['fields'])
    objective_id = fields.field_id('activity')
    new_dataset_args = {
        'objective_field': {'id': objective_id}}
    new_dataset = api.update_dataset(new_dataset, new_dataset_args)

    # Create training and test set for evaluation
    log("Splitting dataset...")
    training, test = train_test_split(api, new_dataset)

    log("Creating a model using the training dataset...")
    model_args = {
        'objective_field': objective_id,
        'balance_objective': args.balance,
        'name': training['object']['name']}
    model = api.create_model(training, model_args)
    if not api.ok(model):
        sys.exit("Model isn't ready...")

    # Creating an evaluation
    log("Evaluating model against the test dataset...")
    eval_args = {
        'name': name + ' - 80% vs 20%'}
    evaluation = api.create_evaluation(model, test, eval_args)
    if not api.ok(evaluation):
        sys.exit("Evaluation isn't ready...")

    log("Creating model for the full dataset...")
    model = api.create_model(new_dataset, model_args)
    if not api.ok(model):
        sys.exit("Model isn't ready...")

    # Create private links
    if args.share:
        log("Sharing resources...")
        dataset_private_link = share_dataset(api, new_dataset)
        model_private_link = share_model(api, model)
        evaluation_private_link = share_evaluation(api, evaluation)
        log(dataset_private_link)
        log(model_private_link)
        log(evaluation_private_link)

开发者ID:aficionado，项目名称:nextactivity，代码行数:102，代码来源:next_activity.py

示例10: Cluster

# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_dataset [as 别名]

#.........这里部分代码省略.........
        # Checks and cleans input_data leaving the fields used in the model
        reference_point, _ = self._prepare_for_distance( \
            reference_point, by_name=by_name)
        # mimic centroid structure to use it in distance computation
        point_info = {"center": reference_point}
        reference = Centroid(point_info)
        distances = []
        for point in list_of_points:
            centroid_id = None
            if isinstance(point, Centroid):
                centroid_id = point.centroid_id
                point = point.center
                by_name = False
            clean_point, unique_terms = self._prepare_for_distance( \
                point, by_name=by_name)
            if clean_point != reference_point:
                result = {"data": point, "distance": reference.distance2( \
                    clean_point, unique_terms, self.scales)}
                if centroid_id is not None:
                    result.update({"centroid_id": centroid_id})
                distances.append(result)
        return distances

    def points_in_cluster(self, centroid_id):
        """Returns the list of data points that fall in one cluster.

        """

        cluster_datasets = self.datasets
        centroid_dataset = cluster_datasets.get(centroid_id)
        if self.api is None:
            self.api = BigML(storage=STORAGE)
        if centroid_dataset in [None, ""]:
            centroid_dataset = self.api.create_dataset( \
                self.resource_id, {"centroid": centroid_id})
            self.api.ok(centroid_dataset)
        else:
            centroid_dataset = self.api.check_resource( \
                "dataset/%s" % centroid_dataset)
        # download dataset to compute local predictions
        downloaded_data = self.api.download_dataset( \
            centroid_dataset["resource"])
        if PY3:
            text_reader = codecs.getreader("utf-8")
            downloaded_data = text_reader(downloaded_data)
        reader = csv.DictReader(downloaded_data)
        points = []
        for row in reader:
            points.append(row)
        return points

    def closest_in_cluster(self, reference_point,
                           number_of_points=None,
                           centroid_id=None,
                           by_name=True):
        """Computes the list of data points closer to a reference point.
        If no centroid_id information is provided, the points are chosen
        from the same cluster as the reference point.
        The points are returned in a list, sorted according
        to their distance to the reference point. The number_of_points
        parameter can be set to truncate the list to a maximum number of
        results. The response is a dictionary that contains the
        centroid id of the cluster plus the list of points
        """
        if centroid_id is not None and centroid_id not in \
                [centroid.centroid_id for centroid in self.centroids]:

开发者ID:charleslparker，项目名称:python，代码行数:70，代码来源:cluster.py

示例11: BigML

# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_dataset [as 别名]
from bigml.api import BigML
api = BigML()

source1 = api.create_source("iris.csv")
api.ok(source1)

dataset1 = api.create_dataset(source1)
api.ok(dataset1)

cluster1 = api.create_cluster(dataset1)
api.ok(cluster1)

batchcentroid1 = api.create_batch_centroid(cluster1, dataset1, \
    {'output_dataset': True})
api.ok(batchcentroid1)

dataset2 = api.create_dataset(batchcentroid1)
api.ok(dataset2)

dataset2 = api.get_dataset(batchcentroid1)
api.ok(dataset2)

dataset2 = api.update_dataset(dataset2, \
    {'fields': {u'000000': {'name': u'cluster'}}})
api.ok(dataset2)

dataset3 = api.create_dataset(dataset2, \
    {'input_fields': [u'000000'],
     'name': u'my_dataset_from_dataset_from_batch_centroid_name',
     'new_fields': [{'field': u'( integer ( replace ( field "cluster" ) "Cluster " "" ) )',
                     u'name': u'Cluster'}]})

开发者ID:Pkuzhali，项目名称:bigmler，代码行数:33，代码来源:reify_batch_centroid_dataset_dataset.py

示例12: BigML

# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_dataset [as 别名]
from bigml.api import BigML

api = BigML()

source1 = api.create_source("iris.csv")
api.ok(source1)

dataset1 = api.create_dataset(source1, {"name": u"iris dataset"})
api.ok(dataset1)

cluster1 = api.create_cluster(dataset1, {"name": u"my_cluster_name"})
api.ok(cluster1)

开发者ID:bigmlcom，项目名称:bigmler，代码行数:14，代码来源:reify_cluster.py

示例13: BigML

# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_dataset [as 别名]
from bigml.api import BigML
import csv
import time

api = BigML(dev_mode=True)

# get args
train_csv = sys.argv[1]
test_csv = sys.argv[2]

# train model
source_train = api.create_source('./../../data/census/train.csv')
dataset_train = api.create_dataset(dataset_train)
model = api.create_model(dataset)

# test model
with open('./data/census/test.csv', 'rb') as csv_test_file:
    test_csv_reader = csv.reader(csv_test_file, delimiter=',', quotechar='"')
    for row in test_csv_reader:   
        row.pop()
        row = dict(zip(range(0, len(row)), row))
        prediction = api.create_prediction(model, row)
        api.pprint(prediction)

开发者ID:marcharding，项目名称:ml-saas-comparison，代码行数:25，代码来源:saas_bigml_single.py

示例14: BigML

# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_dataset [as 别名]
    from bigml.api import BigML
    api = BigML()

    source1 = api.create_source("iris.csv")
    api.ok(source1)

    dataset1 = api.create_dataset(source1, \
        {'name': 'iris'})
    api.ok(dataset1)

    cluster1 = api.create_cluster(dataset1, \
        {'name': 'iris'})
    api.ok(cluster1)

    batchcentroid1 = api.create_batch_centroid(cluster1, dataset1, \
        {'name': 'iris dataset with iris', 'output_dataset': True})
    api.ok(batchcentroid1)

    dataset2 = api.get_dataset(batchcentroid1['object']['output_dataset_resource'])
    api.ok(dataset2)

    dataset2 = api.update_dataset(dataset2, \
        {'name': 'iris dataset with iris'})
    api.ok(dataset2)

    dataset3 = api.create_dataset(dataset2, \
        {'name': 'my_dataset_from_dataset_from_batch_centroid_name',
         'new_fields': [{'field': '( integer ( replace ( field "cluster" ) '
                                  '"Cluster " "" ) )',
                         'name': 'Cluster'}],
     'objective_field': {'id': '100000'}})

开发者ID:mmerce，项目名称:bigmler，代码行数:33，代码来源:reify_batch_centroid_dataset_dataset_py3.py

示例15: BigML

# 需要导入模块: from bigml.api import BigML [as 别名]
# 或者: from bigml.api.BigML import create_dataset [as 别名]
#@see: http://bigml.readthedocs.org/en/latest/#local-predictions
from bigml.api import BigML
api = BigML('smarkit',"37b903bf765414b5e1c3164061cee5fa57e7e6ad",storage='./storage')

source = api.create_source('./data/red_bule_balls_2003.csv')
api.pprint(api.get_fields(source))
dataset = api.create_dataset(source)
model = api.create_model(dataset)
prediction = api.create_prediction(model, {'red':[1,2,3,4,5,6],'blue':7})
#prediction
api.pprint(prediction)

开发者ID:daiyoko，项目名称:LotteryPrediction，代码行数:13，代码来源:BigML.py

注：本文中的bigml.api.BigML.create_dataset方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。