当前位置: 首页>>代码示例>>Python>>正文


Python datasets.dump_svmlight_file函数代码示例

本文整理汇总了Python中sklearn.datasets.dump_svmlight_file函数的典型用法代码示例。如果您正苦于以下问题:Python dump_svmlight_file函数的具体用法?Python dump_svmlight_file怎么用?Python dump_svmlight_file使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了dump_svmlight_file函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_dump_concise

def test_dump_concise():
    one = 1
    two = 2.1
    three = 3.01
    exact = 1.000000000000001
    # loses the last decimal place
    almost = 1.0000000000000001
    X = [[one, two, three, exact, almost],
         [1e9, 2e18, 3e27, 0, 0],
         [0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0]]
    y = [one, two, three, exact, almost]
    f = BytesIO()
    dump_svmlight_file(X, y, f)
    f.seek(0)
    # make sure it's using the most concise format possible
    assert_equal(f.readline(),
                 b("1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1\n"))
    assert_equal(f.readline(), b("2.1 0:1000000000 1:2e+18 2:3e+27\n"))
    assert_equal(f.readline(), b("3.01 \n"))
    assert_equal(f.readline(), b("1.000000000000001 \n"))
    assert_equal(f.readline(), b("1 \n"))
    f.seek(0)
    # make sure it's correct too :)
    X2, y2 = load_svmlight_file(f)
    assert_array_almost_equal(X, X2.toarray())
    assert_array_equal(y, y2)
开发者ID:BrenBarn,项目名称:scikit-learn,代码行数:28,代码来源:test_svmlight_format.py

示例2: fit

    def fit(self, X, Y):

        self.labels=list(set(Y))
        if len(self.labels) > 2 :
            self.multiclass=True
            #print 'multiclass'
        else:
            self.multiclass=False
            
        self.train_fname =self.base_str +'-svmcmd-train' +  '.dat'
        self.model_fname =self.train_fname + '.model'
        dump_svmlight_file(X,Y,self.train_fname ,zero_based=False)
        if self.multiclass:
            command_line=path_to_train_program+'gtsvm_initialize {0} -f {1} -o {2}  -m 1 '.format(self.param_str, self.train_fname , self.model_fname )
        else:
            command_line=path_to_train_program+'gtsvm_initialize -f {1} -o {2} {0}'.format(self.param_str, self.train_fname , self.model_fname )
        args = shlex.split(command_line)
        p = subprocess.Popen(args)
        p.wait()
        command_line=path_to_train_program+'gtsvm_optimize -i {0} -o {1} -e {2} -n {3}'.format(self.model_fname,self.model_fname,self.tol,self.max_iter)    
        args = shlex.split(command_line)
        p = subprocess.Popen(args,stderr=subprocess.PIPE)
        p.wait()
        opt_err_str=p.stderr.read() ##gtsvm is too buggy
        if len(opt_err_str) < 1: 
            command_line=path_to_train_program+'gtsvm_shrink -i {0}  -o {1}'.format(self.model_fname,self.model_fname)
            args = shlex.split(command_line)
            p = subprocess.Popen(args)
            p.wait()
            self.train_fail=False
        else :
            self.train_fail=True
            
        
        return self
开发者ID:niitsuma,项目名称:gtsvm,代码行数:35,代码来源:gtsvm.py

示例3: predict

 def predict(self, X):
     if isinstance(X,list):
         self.test_n_sample=len(X)
     else:
         self.test_n_sample=X.shape[0]
     Y=[1]*self.test_n_sample
     self.test_fname =self.base_str +'-svmcmd-test' +  '.dat'
     self.predict_fname =self.base_str +'-svmcmd-predict' +  '.dat'
     dump_svmlight_file(X,Y,self.test_fname ,zero_based=False)
     command_line=path_to_train_program+'gtsvm_classify -f {0}  -i {1} -o {2}'.format(self.test_fname , self.model_fname, self.predict_fname )
     args = shlex.split(command_line)
     p = subprocess.Popen(args)
     p.wait()
     if self.train_fail:
         return [max(self.labels)+1]*self.test_n_sample
     
     if self.multiclass : 
         f = open(self.predict_fname, 'rb')
         self.predicted_weight = map(lambda row: map(float,row), list(csv.reader(f)))
         f.close()
         Y_predict=map(np.argmax, self.predicted_weight)
     else :
         self.predicted_weight = np.loadtxt( self.predict_fname)
         Y_predict=map(int,map(round,self.predicted_weight))
     return Y_predict
开发者ID:niitsuma,项目名称:gtsvm,代码行数:25,代码来源:gtsvm.py

示例4: generate_weekday_newbuyer_exposure

def generate_weekday_newbuyer_exposure(df):
    """
    加入新客数,曝光数
    """
    X = df[['uv_0612_0618', 'uv_weekday', 'uv_weekend', 'no_subsidy_exposure', 'newbuyer_6_18']]
    y = df.uv_0626_0702
    dump_svmlight_file(X, y, './uv_weekday_weekend_newbuyer_exposure_without_outliers.dat')
开发者ID:lujiaying,项目名称:jiayinglu,代码行数:7,代码来源:generate_libsvm_format.py

示例5: generate_week

def generate_week(df):
    """
    生成1维特征
    """
    X = df[['uv_0612_0618']]
    y = df.uv_0626_0702
    dump_svmlight_file(X, y, './uv_week.dat')
开发者ID:lujiaying,项目名称:jiayinglu,代码行数:7,代码来源:generate_libsvm_format.py

示例6: executa_extracao_n

def executa_extracao_n(base_treino, metodo, n=1):
    inicio = time()    
    
    lista_imagens = arq.busca_arquivos(base_treino, "*.png")
    n_imgs_treino = len(lista_imagens)
    
    for lado in range(8,n+1,4):
        atributos = []    
        rotulos = []     
            
        arq_treino = base_treino + "base_PFTAS_"+str(lado)+"x"+str(lado)+".svm"
        ##  INICIO DO PROCESSO DE EXTRACAO DE ATRIBUTOS    
        
        for arq_imagem in lista_imagens: 
            print("Arquivo: " + arq_imagem)
            imagem = mh.imread(arq_imagem) 
            if (imagem != None):
                classe, _ = ex.classe_arquivo(arq_imagem)             
                print("executa_extracao_n - shape imagem:" + str(imagem.shape))
                # Extrai os atributos e gera os arquivos dos patches da base de treino
                atrs,rots = extrai_pftas_patches_n(imagem, classe, lado)                            
                atributos += atrs
                rotulos += rots
        
        dump_svmlight_file(atributos, rotulos, arq_treino)
    
    log("Extraidos atributos da base " + base_treino + " utilizando " + metodo + "\n para " + str(n_imgs_treino) + "imagens") 
  
    # Exibe o tempo de execução    
    log(str(time()-inicio) + "EXTRAÇÃO")     
开发者ID:willianfatec,项目名称:PatchWiser,代码行数:30,代码来源:testes.py

示例7: generate_weekday_weekend

def generate_weekday_weekend(df):
    """
    生成3维特征
    """
    X = df[['uv_0612_0618', 'uv_weekday', 'uv_weekend']]
    y = df.uv_0626_0702
    dump_svmlight_file(X, y, './uv_weekday_weekend.dat')
开发者ID:lujiaying,项目名称:jiayinglu,代码行数:7,代码来源:generate_libsvm_format.py

示例8: test_dump

def test_dump():
    Xs, y = load_svmlight_file(datafile)
    Xd = Xs.toarray()

    for X in (Xs, Xd):
        for zero_based in (True, False):
            for dtype in [np.float32, np.float64]:
                f = BytesIO()
                # we need to pass a comment to get the version info in;
                # LibSVM doesn't grok comments so they're not put in by
                # default anymore.
                dump_svmlight_file(X.astype(dtype), y, f, comment="test",
                                   zero_based=zero_based)
                f.seek(0)

                comment = f.readline()
                assert_in("scikit-learn %s" % sklearn.__version__, comment)
                comment = f.readline()
                assert_in(["one", "zero"][zero_based] + "-based", comment)

                X2, y2 = load_svmlight_file(f, dtype=dtype,
                                            zero_based=zero_based)
                assert_equal(X2.dtype, dtype)
                if dtype == np.float32:
                    assert_array_almost_equal(
                        # allow a rounding error at the last decimal place
                        Xd.astype(dtype), X2.toarray(), 4)
                else:
                    assert_array_almost_equal(
                        # allow a rounding error at the last decimal place
                        Xd.astype(dtype), X2.toarray(), 15)
                assert_array_equal(y, y2)
开发者ID:yzhy,项目名称:scikit-learn,代码行数:32,代码来源:test_svmlight_format.py

示例9: save_all_data_in_svmlight_format

 def save_all_data_in_svmlight_format(self,
                                      file_path,
                                      extraction_method,
                                      label_type):
     label_list, feature_vector_list = self.extract_all_data(extraction_method, label_type)
     with open(file_path, 'wb') as f:
         datasets.dump_svmlight_file(feature_vector_list, label_list, f)
开发者ID:t-usui,项目名称:COMES,代码行数:7,代码来源:data_processor.py

示例10: dump_svmlight

def dump_svmlight(X_matrix, Y, feature_names, output_filename, feature_id_offset = 0):
  dump_svmlight_file(X_matrix, Y, output_filename)
  contents = None
  with open(output_filename) as output_file:
    contents = '#' + ' '.join(feature_names) + '\n' + ''.join(output_file.readlines())
  with open(output_filename, 'w') as output_file:
    output_file.write(contents)
开发者ID:hczhu,项目名称:script-tools,代码行数:7,代码来源:translate_feature_format.py

示例11: batch_fit

    def batch_fit(self, Xs, ys, dump=True):
        qids = [np.array([i] * len(ys[i])) for i in range(len(ys))]
        print "dumping data to Xtrain.data"
        if dump:
            dump_svmlight_file(
                np.concatenate(Xs), np.concatenate(ys), "Xtrain.data", zero_based=False, query_id=np.concatenate(qids)
            )

        print "now learning"

        print call(
            [
                self.path + "svm_hmm_learn",
                "-c",
                "%d" % self.C,
                "--t",
                "%d" % self.t,
                "--e",
                "%d" % self.e,
                "Xtrain.data",
                "svmhmm-model.dat",
            ]
        )

        return self
开发者ID:Hanshan1988,项目名称:smartphone-activity-recognition,代码行数:25,代码来源:svmhmm.py

示例12: load_training_data

def load_training_data(file_location=str, load_from_database=False, limit=int(1000), clean_dataset=True):
    """
    If ```load_from_database``` is True, retrieves and stores data from database to file.

    Arguments:
        file_location (str): Path + filename of libsvm file to save/load (e.g. 'training_data')
        load_from_database (bool): Should data be retrieved from database?
        limit (int): Amount of records to retrieve from database (default=1000)
        clean_dataset (bool): Should questions be cleaned (e.g. remove code samples, hexadecimals, numbers, etc)?

    Returns:
         (pandas.DataFrame.from_csv, sklearn.datasets.load_svmlight_file):
         Tuple containing a pandas.DataFrame (all data retrieved from database) and
         tuple with training data (load_svmlight_file)

    See:
        | ```MySQLDatabase().retrieve_training_data```
        | ```pandas.DataFrame.to_csv```
        | ```pandas.DataFrame.from_csv```
        | ```sklearn.datasets.dump_svmlight_file```
        | ```sklearn.datasets.load_svmlight_file```
    """
    svm_file = file_location + ".dat"
    csv_file = file_location + ".csv"
    if load_from_database:
        comment = u"label: (-1: Bad question, +1: Good question); features: (term_id, frequency)"
        MySQLDatabase().set_vote_value_params()
        data = MySQLDatabase().retrieve_training_data(limit, clean_dataset)
        # create a term-document matrix
        vectorizer = CountVectorizer(analyzer='word', min_df=0.01, stop_words="english")
        td_matrix = vectorizer.fit_transform(data.get(QUESTION_TEXT_KEY))
        data.to_csv(csv_file)
        dump_svmlight_file(td_matrix, data[CLASS_LABEL_KEY], f=svm_file, comment=comment)
    return DataFrame.from_csv(csv_file), load_svmlight_file(svm_file)
开发者ID:klAndersen,项目名称:IMT4904_MasterThesis_Code,代码行数:34,代码来源:test_all_algorithms.py

示例13: test_load_with_offsets

def test_load_with_offsets(sparsity, n_samples, n_features):
    rng = np.random.RandomState(0)
    X = rng.uniform(low=0.0, high=1.0, size=(n_samples, n_features))
    if sparsity:
        X[X < sparsity] = 0.0
    X = sp.csr_matrix(X)
    y = rng.randint(low=0, high=2, size=n_samples)

    f = BytesIO()
    dump_svmlight_file(X, y, f)
    f.seek(0)

    size = len(f.getvalue())

    # put some marks that are likely to happen anywhere in a row
    mark_0 = 0
    mark_1 = size // 3
    length_0 = mark_1 - mark_0
    mark_2 = 4 * size // 5
    length_1 = mark_2 - mark_1

    # load the original sparse matrix into 3 independent CSR matrices
    X_0, y_0 = load_svmlight_file(f, n_features=n_features,
                                  offset=mark_0, length=length_0)
    X_1, y_1 = load_svmlight_file(f, n_features=n_features,
                                  offset=mark_1, length=length_1)
    X_2, y_2 = load_svmlight_file(f, n_features=n_features,
                                  offset=mark_2)

    y_concat = np.concatenate([y_0, y_1, y_2])
    X_concat = sp.vstack([X_0, X_1, X_2])
    assert_array_almost_equal(y, y_concat)
    assert_array_almost_equal(X.toarray(), X_concat.toarray())
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:33,代码来源:test_svmlight_format.py

示例14: subsample_to_file

def subsample_to_file(svm_file, out_dir, out_name, multilabel=False,
                      row_ratio=0.5, col_ratio=0.3, random_state=12):
  """
  Example:

  '''python
     # run the following command in the current directory will create a
     # `tmp` folder, if not already exists, and generate a file called
     # `a9a_sub` from the original file `./data/a9a`. Both files are
     # in libsvm format.
     subsample_to_file("./data/a9a", "./tmp", "a9a_sub")
     # read the subsampled file and make sure its number of rows is half of
     # that of a9a and its number of column is roughly third of a9a (123)
     X, y = load_svmlight_file('./tmp/a9a_sub')
     assert X.shape == (16280, 36)
  '''

  """
  assert 1 >= row_ratio > 0, \
         "Row ratio {row_ratio} must be (0, 1]" \
         .format(**locals())
  assert 1 >= col_ratio > 0, \
         "Col ratio {col_ratio} must be (0, 1]" \
         .format(**locals())
  X, y = load_svmlight_file(svm_file, multilabel=multilabel)
  n, m = X.shape
  subn = int(n*row_ratio)
  subm = int(m*col_ratio)
  rst = np.random.RandomState(random_state)
  ridx = rst.choice(n, subn, replace=False)
  cidx = rst.choice(m, subm, replace=False)
  mkdir_p(out_dir)
  out_file = os.path.join(out_dir, out_name)
  dump_svmlight_file(X[ridx,:][:,cidx], y[ridx],
                     out_file, multilabel=multilabel)
开发者ID:mktal,项目名称:peregrine,代码行数:35,代码来源:utils.py

示例15: save_libfm

def save_libfm(X_sprs_mat, y_array, f):
    
    print("Save LibFM Format")
    
    dump_svmlight_file(X_sprs_mat, y_array, f)
    
    return
开发者ID:nancyya,项目名称:Predictors,代码行数:7,代码来源:dataProcessing.py


注:本文中的sklearn.datasets.dump_svmlight_file函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。