当前位置: 首页>>代码示例>>Python>>正文


Python preprocessing.normalize方法代码示例

本文整理汇总了Python中sklearn.preprocessing.normalize方法的典型用法代码示例。如果您正苦于以下问题:Python preprocessing.normalize方法的具体用法?Python preprocessing.normalize怎么用?Python preprocessing.normalize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.preprocessing的用法示例。


在下文中一共展示了preprocessing.normalize方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: load_names

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def load_names(data_names, norm=True, log1p=False, verbose=True):
    # Load datasets.
    datasets = []
    genes_list = []
    n_cells = 0
    for name in data_names:
        X_i, genes_i = load_data(name)
        if norm:
            X_i = normalize(X_i, axis=1)
        if log1p:
            X_i = np.log1p(X_i)
        X_i = csr_matrix(X_i)
            
        datasets.append(X_i)
        genes_list.append(genes_i)
        n_cells += X_i.shape[0]
        if verbose:
            print('Loaded {} with {} genes and {} cells'.
                  format(name, X_i.shape[1], X_i.shape[0]))
    if verbose:
        print('Found {} cells among all datasets'
              .format(n_cells))

    return datasets, genes_list, n_cells 
开发者ID:brianhie,项目名称:scanorama,代码行数:26,代码来源:process.py

示例2: main

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def main():
    from sklearn import preprocessing
    from sklearn.datasets import fetch_openml as fetch_mldata
    from sklearn.model_selection import train_test_split

    db_name = 'diabetes'
    data_set = fetch_mldata(db_name)
    data_set.data = preprocessing.normalize(data_set.data)

    tmp = data_set.target
    tmpL = [ 1 if i == "tested_positive" else -1 for i in tmp]
    data_set.target = tmpL

    X_train, X_test, y_train, y_test = train_test_split(
        data_set.data, data_set.target, test_size=0.4)

    mlelm = MLELM(hidden_units=(10, 30, 200)).fit(X_train, y_train)
    elm = ELM(200).fit(X_train, y_train)

    print("MLELM Accuracy %0.3f " % mlelm.score(X_test, y_test))
    print("ELM Accuracy %0.3f " % elm.score(X_test, y_test)) 
开发者ID:masaponto,项目名称:Python-ELM,代码行数:23,代码来源:ml_elm.py

示例3: train

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def train(self):
        self.adj = self.getAdjMat()
        self.node_size = self.adj.shape[0]
        self.Ak = np.matrix(np.identity(self.node_size))
        self.RepMat = np.zeros((self.node_size, int(self.dim*self.Kstep)))
        for i in range(self.Kstep):
            print('Kstep =', i)
            self.Ak = np.dot(self.Ak, self.adj)
            probTranMat = self.GetProbTranMat(self.Ak)
            Rk = self.GetRepUseSVD(probTranMat, 0.5)
            Rk = normalize(Rk, axis=1, norm='l2')
            self.RepMat[:, self.dim*i:self.dim*(i+1)] = Rk[:, :]
        # get embeddings
        self.vectors = {}
        look_back = self.g.look_back_list
        for i, embedding in enumerate(self.RepMat):
            self.vectors[look_back[i]] = embedding 
开发者ID:thunlp,项目名称:OpenNE,代码行数:19,代码来源:grarep.py

示例4: pre_factorization

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def pre_factorization(G, n_components, exponent):
        """
        Network Embedding as Sparse Matrix Factorization
        """
        C1 = preprocessing.normalize(G, "l1")
        # Prepare negative samples
        neg = np.array(C1.sum(axis=0))[0] ** exponent
        neg = neg / neg.sum()
        neg = sparse.diags(neg, format="csr")
        neg = G.dot(neg)
        # Set negative elements to 1 -> 0 when log
        C1.data[C1.data <= 0] = 1
        neg.data[neg.data <= 0] = 1
        C1.data = np.log(C1.data)
        neg.data = np.log(neg.data)
        C1 -= neg
        features_matrix = ProNE.tsvd_rand(C1, n_components=n_components)
        return features_matrix 
开发者ID:VHRanger,项目名称:nodevectors,代码行数:20,代码来源:prone.py

示例5: parse

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def parse():
  parser = argparse.ArgumentParser()
  parser.add_argument('dataset', help='pol or main', type=str)
  parser.add_argument('-n', '--n', default=1, help='Number of grams', type=int)
  parser.add_argument('--min_count', default=1, help='Min count', type=int)
  parser.add_argument('--embedding', default=CCGLOVE,
                      help='embedding file', type=str)
  parser.add_argument('--weights', default=None,
                      help='weights to use for ngrams (e.g. sif, None)', type=str)
  parser.add_argument('-norm', '--normalize', action='store_true',
                      help='Normalize vectors')
  parser.add_argument('-l', '--lower', action='store_true',
                      help='Whether or not to lowercase text')
  parser.add_argument('-e', '--embed', action='store_true',
                      help='Use embeddings instead of bong')
  return parser.parse_args() 
开发者ID:NLPrinceton,项目名称:SARC,代码行数:18,代码来源:eval.py

示例6: strip_accents_unicode

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def strip_accents_unicode(s):
    """Transform accentuated unicode symbols into their simple counterpart

    Warning: the python-level loop and join operations make this
    implementation 20 times slower than the strip_accents_ascii basic
    normalization.

    See also
    --------
    strip_accents_ascii
        Remove accentuated char for any unicode symbol that has a direct
        ASCII equivalent.
    """
    normalized = unicodedata.normalize('NFKD', s)
    if normalized == s:
        return s
    else:
        return ''.join([c for c in normalized if not unicodedata.combining(c)]) 
开发者ID:prozhuchen,项目名称:2016CCF-sougou,代码行数:20,代码来源:STFIWF.py

示例7: _char_wb_ngrams

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def _char_wb_ngrams(self, text_document):
        """Whitespace sensitive char-n-gram tokenization.

        Tokenize text_document into a sequence of character n-grams
        excluding any whitespace (operating only inside word boundaries)"""
        # normalize white spaces
        text_document = self._white_spaces.sub(" ", text_document)

        min_n, max_n = self.ngram_range
        ngrams = []
        for w in text_document.split():
            w = ' ' + w + ' '
            w_len = len(w)
            for n in xrange(min_n, max_n + 1):
                offset = 0
                ngrams.append(w[offset:offset + n])
                while offset + n < w_len:
                    offset += 1
                    ngrams.append(w[offset:offset + n])
                if offset == 0:  # count a short word (w_len < n) only once
                    break
        return ngrams 
开发者ID:prozhuchen,项目名称:2016CCF-sougou,代码行数:24,代码来源:STFIWF.py

示例8: __init__

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def __init__(self, word_vec_list, args, input_dimension=1500, hidden_dimensions=None):
        self.session = load_session()
        self.args = args
        self.weights, self.biases = {}, {}
        self.input_dimension = input_dimension
        if hidden_dimensions is None:
            hidden_dimensions = [1024, 512, self.args.dim]
        self.hidden_dimensions = hidden_dimensions
        self.layer_num = len(self.hidden_dimensions)
        self.encoder_output = None
        self.decoder_output = None
        self.decoder_op = None

        self.word_vec_list = np.reshape(word_vec_list, [len(word_vec_list), input_dimension])
        if self.args.encoder_normalize:
            self.word_vec_list = preprocessing.normalize(self.word_vec_list)

        self._init_graph()
        self._loss_optimizer()
        tf.global_variables_initializer().run(session=self.session) 
开发者ID:nju-websoft,项目名称:MultiKE,代码行数:22,代码来源:literal_encoder.py

示例9: _generate_name_vectors_mat

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def _generate_name_vectors_mat(self):
        name_ordered_list = list()
        num = len(self.entities)
        print("total entities:", num)
        entity_id_uris_dic = dict(zip(self.kgs.kg1.entities_id_dict.values(), self.kgs.kg1.entities_id_dict.keys()))
        entity_id_uris_dic2 = dict(zip(self.kgs.kg2.entities_id_dict.values(), self.kgs.kg2.entities_id_dict.keys()))
        entity_id_uris_dic.update(entity_id_uris_dic2)
        print('total entities ids:', len(entity_id_uris_dic))
        assert len(entity_id_uris_dic) == num
        for i in range(num):
            assert i in entity_id_uris_dic
            entity_uri = entity_id_uris_dic.get(i)
            assert entity_uri in self.entity_local_name_dict
            entity_name = self.entity_local_name_dict.get(entity_uri)
            entity_name_index = self.literal_id_dic.get(entity_name)
            name_ordered_list.append(entity_name_index)
        print('name_ordered_list', len(name_ordered_list))
        name_mat = self.literal_vectors_mat[name_ordered_list, ]
        print("entity name embeddings mat:", type(name_mat), name_mat.shape)
        if self.args.literal_normalize:
            name_mat = preprocessing.normalize(name_mat)
        self.local_name_vectors = name_mat 
开发者ID:nju-websoft,项目名称:MultiKE,代码行数:24,代码来源:data_model.py

示例10: valid

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def valid(model, embed_choice='avg', w=(1, 1, 1)):
    if embed_choice == 'nv':
        ent_embeds = model.name_embeds.eval(session=model.session)
    elif embed_choice == 'rv':
        ent_embeds = model.rv_ent_embeds.eval(session=model.session)
    elif embed_choice == 'av':
        ent_embeds = model.av_ent_embeds.eval(session=model.session)
    elif embed_choice == 'final':
        ent_embeds = model.ent_embeds.eval(session=model.session)
    elif embed_choice == 'avg':
        ent_embeds = w[0] * model.name_embeds.eval(session=model.session) + \
                     w[1] * model.rv_ent_embeds.eval(session=model.session) + \
                     w[2] * model.av_ent_embeds.eval(session=model.session)
    else:  # 'final'
        ent_embeds = model.ent_embeds
    print(embed_choice, 'valid results:')
    embeds1 = ent_embeds[model.kgs.valid_entities1,]
    embeds2 = ent_embeds[model.kgs.valid_entities2 + model.kgs.test_entities2,]
    hits1_12, mrr_12 = eva.valid(embeds1, embeds2, None, model.args.top_k, model.args.test_threads_num,
                                 normalize=True)
    del embeds1, embeds2
    gc.collect()
    return mrr_12 
开发者ID:nju-websoft,项目名称:MultiKE,代码行数:25,代码来源:MultiKE_Late.py

示例11: test

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def test(model, embed_choice='avg', w=(1, 1, 1)):
    if embed_choice == 'nv':
        ent_embeds = model.name_embeds.eval(session=model.session)
    elif embed_choice == 'rv':
        ent_embeds = model.rv_ent_embeds.eval(session=model.session)
    elif embed_choice == 'av':
        ent_embeds = model.av_ent_embeds.eval(session=model.session)
    elif embed_choice == 'final':
        ent_embeds = model.ent_embeds.eval(session=model.session)
    elif embed_choice == 'avg':
        ent_embeds = w[0] * model.name_embeds.eval(session=model.session) + \
                     w[1] * model.rv_ent_embeds.eval(session=model.session) + \
                     w[2] * model.av_ent_embeds.eval(session=model.session)
    else:  # wavg
        ent_embeds = model.ent_embeds
    print(embed_choice, 'test results:')
    embeds1 = ent_embeds[model.kgs.test_entities1,]
    embeds2 = ent_embeds[model.kgs.test_entities2,]
    hits1_12, mrr_12 = eva.valid(embeds1, embeds2, None, model.args.top_k, model.args.test_threads_num,
                                 normalize=True)
    del embeds1, embeds2
    gc.collect()
    return mrr_12 
开发者ID:nju-websoft,项目名称:MultiKE,代码行数:25,代码来源:MultiKE_Late.py

示例12: _compute_weight

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def _compute_weight(embeds1, embeds2, embeds3):
    def min_max_normalization(mat):
        min_ = np.min(mat)
        max_ = np.max(mat)
        return (mat - min_) / (max_ - min_)

    other_embeds = (embeds1 + embeds2 + embeds3) / 3
    # other_embeds = (embeds2 + embeds3) / 2
    other_embeds = preprocessing.normalize(other_embeds)
    embeds1 = preprocessing.normalize(embeds1)
    # sim_mat = sim(embeds1, other_embeds, metric='cosine')
    sim_mat = np.matmul(embeds1, other_embeds.T)
    # sim_mat = 1 - euclidean_distances(embeds1, other_embeds)
    weights = np.diag(sim_mat)
    # print(weights.shape, np.mean(weights))
    # weights = min_max_normalization(weights)
    print(weights.shape, np.mean(weights))
    return np.mean(weights) 
开发者ID:nju-websoft,项目名称:MultiKE,代码行数:20,代码来源:MultiKE_Late.py

示例13: _predict_proba

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def _predict_proba(self, X):
        y_proba = np.asarray([0.])

        for i in range(len(self.ensemble)):
            y_proba_temp = self.ensemble[i].predict_proba(X)
            if np.sum(y_proba_temp) > 0.0:
                y_proba_temp = normalize(y_proba_temp, norm='l1')[0].copy()
                acc = self.ensemble[i].performance_evaluator.accuracy_score()
                if not self.disable_weighted_vote and acc > 0.0:
                    y_proba_temp *= acc
                # Check array length consistency
                if len(y_proba_temp) != len(y_proba):
                    if len(y_proba_temp) > len(y_proba):
                        y_proba.resize((len(y_proba_temp), ), refcheck=False)
                    else:
                        y_proba_temp.resize((len(y_proba), ), refcheck=False)
                # Add values
                y_proba += y_proba_temp
        return y_proba 
开发者ID:scikit-multiflow,项目名称:scikit-multiflow,代码行数:21,代码来源:streaming_random_patches.py

示例14: _update_embedding

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def _update_embedding(self, graph, original_embedding):
        r"""Performs the Network Embedding Update on the original embedding.
        Args:
            original_embedding (Numpy array): An array containing an embedding.
            graph (NetworkX graph): The embedded graph.

        Return types:
            embedding (Numpy array): An array containing the updated embedding.
        """
        embedding = self._normalize_embedding(original_embedding)
        adjacency = nx.adjacency_matrix(graph, nodelist=range(graph.number_of_nodes()))
        normalized_adjacency = normalize(adjacency, norm='l1', axis=1)
        for _ in range(self.iterations):
            embedding = (embedding + 
                         self.L1*(normalized_adjacency @ embedding) + 
                         self.L2*(normalized_adjacency @ (normalized_adjacency @ embedding)))
        return embedding 
开发者ID:benedekrozemberczki,项目名称:karateclub,代码行数:19,代码来源:neu.py

示例15: transform

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def transform(self, X_si, high=None, low=None, limit=None):
        """
        Same as HashingVectorizer transform, except allows for 
        interaction list, which is an iterable the same length as X
        filled with True/False. This method adds an empty row to
        docs labelled as False.
        """
        analyzer = self.build_analyzer()

        X = self._get_hasher().transform(
            analyzer(self._deal_with_input(doc)) for doc in X_si)
        
        X.data.fill(1)

        if self.norm is not None:
            X = normalize(X, norm=self.norm, copy=False)

        if low:
            X = self._limit_features(X, low=low)
        return X 
开发者ID:ijmarshall,项目名称:robotreviewer,代码行数:22,代码来源:vectorizer.py


注:本文中的sklearn.preprocessing.normalize方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。