本文整理汇总了Python中sklearn.externals.six.moves.xrange方法的典型用法代码示例。如果您正苦于以下问题:Python moves.xrange方法的具体用法?Python moves.xrange怎么用?Python moves.xrange使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.externals.six.moves
的用法示例。
在下文中一共展示了moves.xrange方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _word_ngrams
# 需要导入模块: from sklearn.externals.six import moves [as 别名]
# 或者: from sklearn.externals.six.moves import xrange [as 别名]
def _word_ngrams(self, tokens, stop_words=None):
"""Turn tokens into a sequence of n-grams after stop words filtering"""
# handle stop words
if stop_words is not None:
tokens = [w for w in tokens if w not in stop_words]
# handle token n-grams
min_n, max_n = self.ngram_range
if max_n != 1:
original_tokens = tokens
tokens = []
n_original_tokens = len(original_tokens)
for n in xrange(min_n,
min(max_n + 1, n_original_tokens + 1)):
for i in xrange(n_original_tokens - n + 1):
tokens.append(" ".join(original_tokens[i: i + n]))
return tokens
示例2: _char_wb_ngrams
# 需要导入模块: from sklearn.externals.six import moves [as 别名]
# 或者: from sklearn.externals.six.moves import xrange [as 别名]
def _char_wb_ngrams(self, text_document):
"""Whitespace sensitive char-n-gram tokenization.
Tokenize text_document into a sequence of character n-grams
excluding any whitespace (operating only inside word boundaries)"""
# normalize white spaces
text_document = self._white_spaces.sub(" ", text_document)
min_n, max_n = self.ngram_range
ngrams = []
for w in text_document.split():
w = ' ' + w + ' '
w_len = len(w)
for n in xrange(min_n, max_n + 1):
offset = 0
ngrams.append(w[offset:offset + n])
while offset + n < w_len:
offset += 1
ngrams.append(w[offset:offset + n])
if offset == 0: # count a short word (w_len < n) only once
break
return ngrams
示例3: update
# 需要导入模块: from sklearn.externals.six import moves [as 别名]
# 或者: from sklearn.externals.six.moves import xrange [as 别名]
def update(self, event, model):
if event == "examples_loaded":
for i in xrange(len(model.data)):
self.update_example(model, i)
if event == "example_added":
self.update_example(model, -1)
if event == "clear":
self.ax.clear()
self.ax.set_xticks([])
self.ax.set_yticks([])
self.contours = []
self.c_labels = None
self.plot_kernels()
if event == "surface":
self.remove_surface()
self.plot_support_vectors(model.clf.support_vectors_)
self.plot_decision_surface(model.surface, model.surface_type)
self.canvas.draw()
示例4: test_int_float_dict
# 需要导入模块: from sklearn.externals.six import moves [as 别名]
# 或者: from sklearn.externals.six.moves import xrange [as 别名]
def test_int_float_dict():
rng = np.random.RandomState(0)
keys = np.unique(rng.randint(100, size=10).astype(np.intp))
values = rng.rand(len(keys))
d = IntFloatDict(keys, values)
for key, value in zip(keys, values):
assert_equal(d[key], value)
assert_equal(len(d), len(keys))
d.append(120, 3.)
assert_equal(d[120], 3.0)
assert_equal(len(d), len(keys) + 1)
for i in xrange(2000):
d.append(i + 1000, 4.0)
assert_equal(d[1100], 4.0)
示例5: _char_ngrams
# 需要导入模块: from sklearn.externals.six import moves [as 别名]
# 或者: from sklearn.externals.six.moves import xrange [as 别名]
def _char_ngrams(self, text_document):
"""Tokenize text_document into a sequence of character n-grams"""
# normalize white spaces
text_document = self._white_spaces.sub(" ", text_document)
text_len = len(text_document)
ngrams = []
min_n, max_n = self.ngram_range
for n in xrange(min_n, min(max_n + 1, text_len + 1)):
for i in xrange(text_len - n + 1):
ngrams.append(text_document[i: i + n])
return ngrams
示例6: _validate_vocabulary
# 需要导入模块: from sklearn.externals.six import moves [as 别名]
# 或者: from sklearn.externals.six.moves import xrange [as 别名]
def _validate_vocabulary(self):
vocabulary = self.vocabulary
if vocabulary is not None:
if isinstance(vocabulary, set):
vocabulary = sorted(vocabulary)
if not isinstance(vocabulary, Mapping):
vocab = {}
for i, t in enumerate(vocabulary):
if vocab.setdefault(t, i) != i:
msg = "Duplicate term in vocabulary: %r" % t
raise ValueError(msg)
vocabulary = vocab
else:
indices = set(six.itervalues(vocabulary))
if len(indices) != len(vocabulary):
raise ValueError("Vocabulary contains repeated indices.")
for i in xrange(len(vocabulary)):
if i not in indices:
msg = ("Vocabulary of size %d doesn't contain index "
"%d." % (len(vocabulary), i))
raise ValueError(msg)
if not vocabulary:
raise ValueError("empty vocabulary passed to fit")
self.fixed_vocabulary_ = True
self.vocabulary_ = dict(vocabulary)
else:
self.fixed_vocabulary_ = False
示例7: l2
# 需要导入模块: from sklearn.externals.six import moves [as 别名]
# 或者: from sklearn.externals.six.moves import xrange [as 别名]
def l2(Ks, dim, X_rhos, Y_rhos, required, clamp=True, to_self=False):
r'''
Estimates the L2 distance between distributions, via
\int (p - q)^2 = \int p^2 - \int p q - \int q p + \int q^2.
\int pq and \int qp are estimated with the linear function (in both
directions), while \int p^2 and \int q^2 are estimated via the quadratic
function below.
Always clamps negative estimates of l2^2 to 0, because otherwise the sqrt
would break.
'''
n_X = len(X_rhos)
n_Y = len(Y_rhos)
linears = required
assert linears.shape == (1, Ks.size, n_X, n_Y, 2)
X_quadratics = np.empty((Ks.size, n_X), dtype=np.float32)
for i, rho in enumerate(X_rhos):
X_quadratics[:, i] = quadratic(Ks, dim, rho)
Y_quadratics = np.empty((Ks.size, n_Y), dtype=np.float32)
for j, rho in enumerate(Y_rhos):
Y_quadratics[:, j] = quadratic(Ks, dim, rho)
est = -linears.sum(axis=4)
est += X_quadratics[None, :, :, None]
est += Y_quadratics[None, :, None, :]
np.maximum(est, 0, out=est)
np.sqrt(est, out=est)
# diagonal is of course known to be zero
if to_self:
est[:, :, xrange(n_X), xrange(n_Y)] = 0
return est[:, :, :, :, None]
示例8: make_stacked
# 需要导入模块: from sklearn.externals.six import moves [as 别名]
# 或者: from sklearn.externals.six.moves import xrange [as 别名]
def make_stacked(self):
"If unstacked, convert to stacked. If stacked, do nothing."
if self.stacked:
return
self._boundaries = bounds = np.r_[0, np.cumsum(self.n_pts)]
self.stacked_features = stacked = np.vstack(self.features)
self.features = np.array(
[stacked[bounds[i-1]:bounds[i]] for i in xrange(1, len(bounds))],
dtype=object)
self.stacked = True
############################################################################
## Properties to get at basic metadata
示例9: test_mean
# 需要导入模块: from sklearn.externals.six import moves [as 别名]
# 或者: from sklearn.externals.six.moves import xrange [as 别名]
def test_mean():
dim = 5
n_bags = 50
np.random.seed(42)
bags = [np.random.randn(np.random.randint(30, 100), dim)
for _ in xrange(n_bags)]
meaned = BagMean().fit_transform(bags)
assert meaned.shape == (n_bags, dim)
assert np.allclose(meaned[3], np.mean(bags[3], axis=0))
示例10: test_bagofwords_basic
# 需要导入模块: from sklearn.externals.six import moves [as 别名]
# 或者: from sklearn.externals.six.moves import xrange [as 别名]
def test_bagofwords_basic():
n_codewords = 10
dim = 5
kmeans = KMeans(n_clusters=n_codewords, max_iter=100, n_init=3,
random_state=47)
bow = BagOfWords(kmeans)
np.random.seed(42)
bags = [np.random.randn(np.random.randint(30, 100), dim)
for _ in xrange(50)]
bowed = bow.fit_transform(bags)
assert bowed.shape == (len(bags), n_codewords)
assert bow.codewords_.shape == (n_codewords, dim)
assert np.all(bowed >= 0)
assert np.all(np.sum(bowed, 1) == [b.shape[0] for b in bags])
bow.fit(Features(bags))
bowed2 = bow.transform(bags)
assert np.all(bowed == bowed2)
assert bow.codewords_.shape == (n_codewords, dim)
minikmeans = MiniBatchKMeans(n_clusters=n_codewords, max_iter=100,
random_state=47)
minibow = BagOfWords(minikmeans)
assert_raises(AttributeError, lambda: minibow.transform(bags))
minibowed = minibow.fit_transform(bags)
assert minibowed.shape == bowed.shape
assert np.all(bowed >= 0)
assert np.all(np.sum(bowed, 1) == [b.shape[0] for b in bags])
示例11: test_l2density_basic
# 需要导入模块: from sklearn.externals.six import moves [as 别名]
# 或者: from sklearn.externals.six.moves import xrange [as 别名]
def test_l2density_basic():
dim = 3
bags = [np.random.randn(np.random.randint(30, 100), dim)
for _ in xrange(50)]
pipe = Pipeline([
('scale', BagMinMaxScaler([0, 1])),
('density', L2DensityTransformer(15)),
])
l2ed = pipe.fit_transform(bags)
assert np.all(np.isfinite(l2ed))
# ||x - y||^2 = <x, x> - 2 <x, y> + <y, y>
K = l2ed.dot(l2ed.T)
row_norms_sq = np.diagonal(K)
l2_dist_sq = row_norms_sq[:, None] - 2 * K + row_norms_sq[None, :]
assert np.min(row_norms_sq) > 0
assert np.min(l2_dist_sq) >= 0
assert_raises(ValueError, lambda: L2DensityTransformer(10, basis='foo'))
t = L2DensityTransformer(10)
assert_raises(AttributeError, lambda: t.transform(bags))
t.fit(dim)
t.transform(BagMinMaxScaler([0, 1]).fit_transform(bags))
assert_raises(ValueError, lambda: t.transform([b[:, :2] for b in bags]))
assert_raises(ValueError, lambda: t.transform(bags))
t.basis = 'haha snuck my way in'
assert_raises(ValueError, lambda: t.transform(bags))
################################################################################
示例12: test_pca
# 需要导入模块: from sklearn.externals.six import moves [as 别名]
# 或者: from sklearn.externals.six.moves import xrange [as 别名]
def test_pca():
bags = [np.random.normal(5, 3, size=(np.random.randint(10, 100), 20))
for _ in xrange(50)]
feats = Features(bags, stack=True)
pca = BagPCA(k=3)
pca.fit(bags)
pcaed = pca.transform(bags)
assert pcaed.dim == 3
BagPCA(varfrac=.3).fit_transform(bags)
pca2 = BagPCA(k=20)
pcaed2 = pca2.fit_transform(bags)
orig = pca2.inverse_transform(pcaed2)
orig.make_stacked()
assert np.allclose(feats.stacked_features, orig.stacked_features)
assert BagPCA(k=5, randomize=True).fit_transform(bags).dim == 5
assert_raises(TypeError, lambda: BagPCA(randomize=True))
assert_raises(TypeError, lambda: BagPCA(mle_components=True, k=12))
assert BagPCA(mle_components=True)
################################################################################
示例13: test_knn_version_consistency
# 需要导入模块: from sklearn.externals.six import moves [as 别名]
# 或者: from sklearn.externals.six.moves import xrange [as 别名]
def test_knn_version_consistency():
if not have_flann:
raise SkipTest("No flann, so skipping knn tests.")
if not have_accel:
raise SkipTest("No skl-groups-accel, so skipping version consistency.")
n = 20
for dim in [1, 7]:
np.random.seed(47)
bags = Features([np.random.randn(np.random.randint(30, 100), dim)
for _ in xrange(n)])
div_funcs = ('kl', 'js', 'renyi:.9', 'l2', 'tsallis:.8')
Ks = (3, 4)
get_est = partial(KNNDivergenceEstimator, div_funcs=div_funcs, Ks=Ks)
results = {}
for version in ('fast', 'slow', 'best'):
est = get_est(version=version)
results[version] = res = est.fit_transform(bags)
assert res.shape == (len(div_funcs), len(Ks), n, n)
assert np.all(np.isfinite(res))
for df, fast, slow in zip(div_funcs, results['fast'], results['slow']):
assert_array_almost_equal(
fast, slow, decimal=1 if df == 'js' else 5,
err_msg="({}, dim {})".format(df, dim))
# TODO: debug JS differences
est = get_est(version='fast', n_jobs=-1)
res = est.fit_transform(bags)
assert np.all(results['fast'] == res)
est = get_est(version='slow', n_jobs=-1)
res = est.fit_transform(bags)
assert np.all(results['slow'] == res)
示例14: test_knn_sanity_slow
# 需要导入模块: from sklearn.externals.six import moves [as 别名]
# 或者: from sklearn.externals.six.moves import xrange [as 别名]
def test_knn_sanity_slow():
if not have_flann:
raise SkipTest("No flann, so skipping knn tests.")
dim = 3
n = 20
np.random.seed(47)
bags = Features([np.random.randn(np.random.randint(30, 100), dim)
for _ in xrange(n)])
# just make sure it runs
div_funcs = ('kl', 'js', 'renyi:.9', 'l2', 'tsallis:.8')
Ks = (3, 4)
est = KNNDivergenceEstimator(div_funcs=div_funcs, Ks=Ks)
res = est.fit_transform(bags)
assert res.shape == (len(div_funcs), len(Ks), n, n)
assert np.all(np.isfinite(res))
# test that JS blows up when there's a huge difference in bag sizes
# (so that K is too low)
assert_raises(
ValueError,
partial(est.fit_transform, bags + [np.random.randn(1000, dim)]))
# test fit() and then transform() with JS, with different-sized test bags
est = KNNDivergenceEstimator(div_funcs=('js',), Ks=(5,))
est.fit(bags, get_rhos=True)
with LogCapture('skl_groups.divergences.knn', level=logging.WARNING) as l:
res = est.transform([np.random.randn(300, dim)])
assert res.shape == (1, 1, 1, len(bags))
assert len(l.records) == 1
assert l.records[0].message.startswith('Y_rhos had a lower max_K')
# test that passing div func more than once raises
def blah(df):
est = KNNDivergenceEstimator(div_funcs=[df, df])
return est.fit(bags)
assert_raises(ValueError, lambda: blah('kl'))
assert_raises(ValueError, lambda: blah('renyi:.8'))
assert_raises(ValueError, lambda: blah('l2'))
示例15: test_knn_memory
# 需要导入模块: from sklearn.externals.six import moves [as 别名]
# 或者: from sklearn.externals.six.moves import xrange [as 别名]
def test_knn_memory():
if not have_flann:
raise SkipTest("No flann, so skipping knn tests.")
dim = 3
n = 20
np.random.seed(47)
bags = Features([np.random.randn(np.random.randint(30, 100), dim)
for _ in xrange(n)])
tdir = tempfile.mkdtemp()
div_funcs = ('kl', 'js', 'renyi:.9', 'l2', 'tsallis:.8')
Ks = (3, 4)
est = KNNDivergenceEstimator(div_funcs=div_funcs, Ks=Ks, memory=tdir)
res1 = est.fit_transform(bags)
with LogCapture('skl_groups.divergences.knn', level=logging.INFO) as l:
res2 = est.transform(bags)
assert len(l.records) == 0
assert np.all(res1 == res2)
with LogCapture('skl_groups.divergences.knn', level=logging.INFO) as l:
res3 = est.fit_transform(bags)
for r in l.records:
assert not r.message.startswith("Getting divergences")
assert np.all(res1 == res3)