本文整理汇总了Python中sklearn.preprocessing.normalize方法的典型用法代码示例。如果您正苦于以下问题:Python preprocessing.normalize方法的具体用法?Python preprocessing.normalize怎么用?Python preprocessing.normalize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing
的用法示例。
在下文中一共展示了preprocessing.normalize方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_names
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def load_names(data_names, norm=True, log1p=False, verbose=True):
# Load datasets.
datasets = []
genes_list = []
n_cells = 0
for name in data_names:
X_i, genes_i = load_data(name)
if norm:
X_i = normalize(X_i, axis=1)
if log1p:
X_i = np.log1p(X_i)
X_i = csr_matrix(X_i)
datasets.append(X_i)
genes_list.append(genes_i)
n_cells += X_i.shape[0]
if verbose:
print('Loaded {} with {} genes and {} cells'.
format(name, X_i.shape[1], X_i.shape[0]))
if verbose:
print('Found {} cells among all datasets'
.format(n_cells))
return datasets, genes_list, n_cells
示例2: main
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def main():
from sklearn import preprocessing
from sklearn.datasets import fetch_openml as fetch_mldata
from sklearn.model_selection import train_test_split
db_name = 'diabetes'
data_set = fetch_mldata(db_name)
data_set.data = preprocessing.normalize(data_set.data)
tmp = data_set.target
tmpL = [ 1 if i == "tested_positive" else -1 for i in tmp]
data_set.target = tmpL
X_train, X_test, y_train, y_test = train_test_split(
data_set.data, data_set.target, test_size=0.4)
mlelm = MLELM(hidden_units=(10, 30, 200)).fit(X_train, y_train)
elm = ELM(200).fit(X_train, y_train)
print("MLELM Accuracy %0.3f " % mlelm.score(X_test, y_test))
print("ELM Accuracy %0.3f " % elm.score(X_test, y_test))
示例3: train
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def train(self):
self.adj = self.getAdjMat()
self.node_size = self.adj.shape[0]
self.Ak = np.matrix(np.identity(self.node_size))
self.RepMat = np.zeros((self.node_size, int(self.dim*self.Kstep)))
for i in range(self.Kstep):
print('Kstep =', i)
self.Ak = np.dot(self.Ak, self.adj)
probTranMat = self.GetProbTranMat(self.Ak)
Rk = self.GetRepUseSVD(probTranMat, 0.5)
Rk = normalize(Rk, axis=1, norm='l2')
self.RepMat[:, self.dim*i:self.dim*(i+1)] = Rk[:, :]
# get embeddings
self.vectors = {}
look_back = self.g.look_back_list
for i, embedding in enumerate(self.RepMat):
self.vectors[look_back[i]] = embedding
示例4: pre_factorization
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def pre_factorization(G, n_components, exponent):
"""
Network Embedding as Sparse Matrix Factorization
"""
C1 = preprocessing.normalize(G, "l1")
# Prepare negative samples
neg = np.array(C1.sum(axis=0))[0] ** exponent
neg = neg / neg.sum()
neg = sparse.diags(neg, format="csr")
neg = G.dot(neg)
# Set negative elements to 1 -> 0 when log
C1.data[C1.data <= 0] = 1
neg.data[neg.data <= 0] = 1
C1.data = np.log(C1.data)
neg.data = np.log(neg.data)
C1 -= neg
features_matrix = ProNE.tsvd_rand(C1, n_components=n_components)
return features_matrix
示例5: parse
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def parse():
parser = argparse.ArgumentParser()
parser.add_argument('dataset', help='pol or main', type=str)
parser.add_argument('-n', '--n', default=1, help='Number of grams', type=int)
parser.add_argument('--min_count', default=1, help='Min count', type=int)
parser.add_argument('--embedding', default=CCGLOVE,
help='embedding file', type=str)
parser.add_argument('--weights', default=None,
help='weights to use for ngrams (e.g. sif, None)', type=str)
parser.add_argument('-norm', '--normalize', action='store_true',
help='Normalize vectors')
parser.add_argument('-l', '--lower', action='store_true',
help='Whether or not to lowercase text')
parser.add_argument('-e', '--embed', action='store_true',
help='Use embeddings instead of bong')
return parser.parse_args()
示例6: strip_accents_unicode
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def strip_accents_unicode(s):
"""Transform accentuated unicode symbols into their simple counterpart
Warning: the python-level loop and join operations make this
implementation 20 times slower than the strip_accents_ascii basic
normalization.
See also
--------
strip_accents_ascii
Remove accentuated char for any unicode symbol that has a direct
ASCII equivalent.
"""
normalized = unicodedata.normalize('NFKD', s)
if normalized == s:
return s
else:
return ''.join([c for c in normalized if not unicodedata.combining(c)])
示例7: _char_wb_ngrams
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def _char_wb_ngrams(self, text_document):
"""Whitespace sensitive char-n-gram tokenization.
Tokenize text_document into a sequence of character n-grams
excluding any whitespace (operating only inside word boundaries)"""
# normalize white spaces
text_document = self._white_spaces.sub(" ", text_document)
min_n, max_n = self.ngram_range
ngrams = []
for w in text_document.split():
w = ' ' + w + ' '
w_len = len(w)
for n in xrange(min_n, max_n + 1):
offset = 0
ngrams.append(w[offset:offset + n])
while offset + n < w_len:
offset += 1
ngrams.append(w[offset:offset + n])
if offset == 0: # count a short word (w_len < n) only once
break
return ngrams
示例8: __init__
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def __init__(self, word_vec_list, args, input_dimension=1500, hidden_dimensions=None):
self.session = load_session()
self.args = args
self.weights, self.biases = {}, {}
self.input_dimension = input_dimension
if hidden_dimensions is None:
hidden_dimensions = [1024, 512, self.args.dim]
self.hidden_dimensions = hidden_dimensions
self.layer_num = len(self.hidden_dimensions)
self.encoder_output = None
self.decoder_output = None
self.decoder_op = None
self.word_vec_list = np.reshape(word_vec_list, [len(word_vec_list), input_dimension])
if self.args.encoder_normalize:
self.word_vec_list = preprocessing.normalize(self.word_vec_list)
self._init_graph()
self._loss_optimizer()
tf.global_variables_initializer().run(session=self.session)
示例9: _generate_name_vectors_mat
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def _generate_name_vectors_mat(self):
name_ordered_list = list()
num = len(self.entities)
print("total entities:", num)
entity_id_uris_dic = dict(zip(self.kgs.kg1.entities_id_dict.values(), self.kgs.kg1.entities_id_dict.keys()))
entity_id_uris_dic2 = dict(zip(self.kgs.kg2.entities_id_dict.values(), self.kgs.kg2.entities_id_dict.keys()))
entity_id_uris_dic.update(entity_id_uris_dic2)
print('total entities ids:', len(entity_id_uris_dic))
assert len(entity_id_uris_dic) == num
for i in range(num):
assert i in entity_id_uris_dic
entity_uri = entity_id_uris_dic.get(i)
assert entity_uri in self.entity_local_name_dict
entity_name = self.entity_local_name_dict.get(entity_uri)
entity_name_index = self.literal_id_dic.get(entity_name)
name_ordered_list.append(entity_name_index)
print('name_ordered_list', len(name_ordered_list))
name_mat = self.literal_vectors_mat[name_ordered_list, ]
print("entity name embeddings mat:", type(name_mat), name_mat.shape)
if self.args.literal_normalize:
name_mat = preprocessing.normalize(name_mat)
self.local_name_vectors = name_mat
示例10: valid
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def valid(model, embed_choice='avg', w=(1, 1, 1)):
if embed_choice == 'nv':
ent_embeds = model.name_embeds.eval(session=model.session)
elif embed_choice == 'rv':
ent_embeds = model.rv_ent_embeds.eval(session=model.session)
elif embed_choice == 'av':
ent_embeds = model.av_ent_embeds.eval(session=model.session)
elif embed_choice == 'final':
ent_embeds = model.ent_embeds.eval(session=model.session)
elif embed_choice == 'avg':
ent_embeds = w[0] * model.name_embeds.eval(session=model.session) + \
w[1] * model.rv_ent_embeds.eval(session=model.session) + \
w[2] * model.av_ent_embeds.eval(session=model.session)
else: # 'final'
ent_embeds = model.ent_embeds
print(embed_choice, 'valid results:')
embeds1 = ent_embeds[model.kgs.valid_entities1,]
embeds2 = ent_embeds[model.kgs.valid_entities2 + model.kgs.test_entities2,]
hits1_12, mrr_12 = eva.valid(embeds1, embeds2, None, model.args.top_k, model.args.test_threads_num,
normalize=True)
del embeds1, embeds2
gc.collect()
return mrr_12
示例11: test
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def test(model, embed_choice='avg', w=(1, 1, 1)):
if embed_choice == 'nv':
ent_embeds = model.name_embeds.eval(session=model.session)
elif embed_choice == 'rv':
ent_embeds = model.rv_ent_embeds.eval(session=model.session)
elif embed_choice == 'av':
ent_embeds = model.av_ent_embeds.eval(session=model.session)
elif embed_choice == 'final':
ent_embeds = model.ent_embeds.eval(session=model.session)
elif embed_choice == 'avg':
ent_embeds = w[0] * model.name_embeds.eval(session=model.session) + \
w[1] * model.rv_ent_embeds.eval(session=model.session) + \
w[2] * model.av_ent_embeds.eval(session=model.session)
else: # wavg
ent_embeds = model.ent_embeds
print(embed_choice, 'test results:')
embeds1 = ent_embeds[model.kgs.test_entities1,]
embeds2 = ent_embeds[model.kgs.test_entities2,]
hits1_12, mrr_12 = eva.valid(embeds1, embeds2, None, model.args.top_k, model.args.test_threads_num,
normalize=True)
del embeds1, embeds2
gc.collect()
return mrr_12
示例12: _compute_weight
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def _compute_weight(embeds1, embeds2, embeds3):
def min_max_normalization(mat):
min_ = np.min(mat)
max_ = np.max(mat)
return (mat - min_) / (max_ - min_)
other_embeds = (embeds1 + embeds2 + embeds3) / 3
# other_embeds = (embeds2 + embeds3) / 2
other_embeds = preprocessing.normalize(other_embeds)
embeds1 = preprocessing.normalize(embeds1)
# sim_mat = sim(embeds1, other_embeds, metric='cosine')
sim_mat = np.matmul(embeds1, other_embeds.T)
# sim_mat = 1 - euclidean_distances(embeds1, other_embeds)
weights = np.diag(sim_mat)
# print(weights.shape, np.mean(weights))
# weights = min_max_normalization(weights)
print(weights.shape, np.mean(weights))
return np.mean(weights)
示例13: _predict_proba
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def _predict_proba(self, X):
y_proba = np.asarray([0.])
for i in range(len(self.ensemble)):
y_proba_temp = self.ensemble[i].predict_proba(X)
if np.sum(y_proba_temp) > 0.0:
y_proba_temp = normalize(y_proba_temp, norm='l1')[0].copy()
acc = self.ensemble[i].performance_evaluator.accuracy_score()
if not self.disable_weighted_vote and acc > 0.0:
y_proba_temp *= acc
# Check array length consistency
if len(y_proba_temp) != len(y_proba):
if len(y_proba_temp) > len(y_proba):
y_proba.resize((len(y_proba_temp), ), refcheck=False)
else:
y_proba_temp.resize((len(y_proba), ), refcheck=False)
# Add values
y_proba += y_proba_temp
return y_proba
示例14: _update_embedding
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def _update_embedding(self, graph, original_embedding):
r"""Performs the Network Embedding Update on the original embedding.
Args:
original_embedding (Numpy array): An array containing an embedding.
graph (NetworkX graph): The embedded graph.
Return types:
embedding (Numpy array): An array containing the updated embedding.
"""
embedding = self._normalize_embedding(original_embedding)
adjacency = nx.adjacency_matrix(graph, nodelist=range(graph.number_of_nodes()))
normalized_adjacency = normalize(adjacency, norm='l1', axis=1)
for _ in range(self.iterations):
embedding = (embedding +
self.L1*(normalized_adjacency @ embedding) +
self.L2*(normalized_adjacency @ (normalized_adjacency @ embedding)))
return embedding
示例15: transform
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import normalize [as 别名]
def transform(self, X_si, high=None, low=None, limit=None):
"""
Same as HashingVectorizer transform, except allows for
interaction list, which is an iterable the same length as X
filled with True/False. This method adds an empty row to
docs labelled as False.
"""
analyzer = self.build_analyzer()
X = self._get_hasher().transform(
analyzer(self._deal_with_input(doc)) for doc in X_si)
X.data.fill(1)
if self.norm is not None:
X = normalize(X, norm=self.norm, copy=False)
if low:
X = self._limit_features(X, low=low)
return X