本文整理汇总了Python中numpy.argpartition函数的典型用法代码示例。如果您正苦于以下问题:Python argpartition函数的具体用法?Python argpartition怎么用?Python argpartition使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了argpartition函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: predict_variance_inf_phase1
def predict_variance_inf_phase1(budget, hum_train_means, temp_train_means, hum_train_vars, temp_train_vars):
"""Method to make predictions based on max-variance active inference."""
start_hum = 0
window_hum = None
window_temp = None
i = 0
hum_preds = np.ones((50, 96))
temp_preds = np.ones((50, 96))
for t in global_times:
if budget > 0:
window_hum = np.argpartition(hum_train_vars[t], -budget)[-budget:]
window_temp = np.argpartition(temp_train_vars[t], -budget)[-budget:]
else:
window_hum = np.array([])
window_temp = np.array([])
hum_pred, temp_pred = makePreds_phase1(window_hum, window_temp, hum_train_means, temp_train_means, i, t)
hum_preds[:, i] = copy.deepcopy(hum_pred)
temp_preds[:, i] = copy.deepcopy(temp_pred)
i += 1
hum_mean_err = mean_absolute_error(hum_test, hum_preds)
temp_mean_err = mean_absolute_error(temp_test, temp_preds)
return hum_preds, temp_preds, hum_mean_err, temp_mean_err
示例2: doKNN
def doKNN(k):
dm = cdist(teXf, trXf,'euclidean')
cfm = np.zeros((10,10), dtype = int)
for a in range(0,len(dm)):
knn = np.argpartition(dm[a], k)[:k]
preds = trY[knn]
counts = np.bincount(preds)
pred = -1
if len(counts)>=2:
top2 = np.argpartition(-counts, 1)
if counts[top2[0]] == counts[top2[1]]:
d = 99999
for i in xrange(0,len(knn)):
val = dm[a][i]
if val < d:
d = dm[a][i]
pred = trY[knn[i]]
else:
pred = top2[0]
else:
pred = 0
#print pred
#mnist.visualize(teX[a])
cfm[teY[a]][pred] += 1
#print cfm
#print "ER: ", 1 - np.sum(np.diagonal(cfm))/np.sum(cfm)
return cfm
示例3: precision_test_function
def precision_test_function(theano_inputs):
k = 10
scores1, scores2, c_select, n_used_items = theano_test_function(*theano_inputs)
ids1 = np.argpartition(-scores1, range(k), axis=-1)[0, :k]
ids2 = np.argpartition(-scores2, range(k), axis=-1)[0, :k]
return ids1, ids2, c_select, n_used_items
示例4: similarity_matrix
def similarity_matrix(self):
""" Calculate the similarity matrix given all samples used for GTM map training
:return: similarity_matrix: Matrix assessing the similarity between samples used for GTM map training
"""
print "Calculating similarity matrix..."
# Find one tenth of the highest and lowest probability distribution values for each sample in the latent space
sim_size = int(round(self.latent_space_size/10))
responsibility_indexes = np.zeros((sim_size * 2, self.input_data.shape[0]))
corr_input = np.zeros((sim_size * 2, self.input_data.shape[0]))
for i in xrange(0, self.input_data.shape[0]):
responsibility_indexes[0:sim_size, i] = np.argpartition(self.gtm_responsibility[:, i],
-sim_size)[-sim_size:]
responsibility_indexes[sim_size:, i] = np.argpartition(self.gtm_responsibility[:, i], sim_size)[0:sim_size]
responsibility_indexes = responsibility_indexes.astype(int)
# Create correlation input matrix for similarity assessment
for i in xrange(0, self.input_data.shape[0]):
corr_input[:, i] = self.gtm_responsibility[responsibility_indexes[:, i], i]
# Calculate correlation between all samples and build similarity matrix
similarity_matrix = np.corrcoef(np.transpose(corr_input))
# Plot heat map of the similarity matrix accordingly
[x, y] = np.meshgrid(np.linspace(1, self.input_data.shape[0], self.input_data.shape[0]),
np.linspace(1, self.input_data.shape[0], self.input_data.shape[0]))
x = np.ravel(x)
y = np.ravel(y)
sim_lat = np.array([x, y])
print "Plotting color mesh image..."
plt.pcolormesh(np.reshape(sim_lat[0, :], (self.input_data.shape[0], self.input_data.shape[0])),
np.reshape(sim_lat[1, :], (self.input_data.shape[0], self.input_data.shape[0])), similarity_matrix,
cmap='magma', vmin=0, vmax=1)
plt.colorbar()
plt.axis([x.min(), x.max(), y.min(), y.max()])
plt.gca().invert_yaxis()
return similarity_matrix
示例5: local_kmeans_class
def local_kmeans_class(I, L, x, k):
from scipy.spatial.distance import cdist
sizex = len(np.atleast_2d(x))
label = np.zeros((sizex,k))
for rowsx in range(0, sizex):
tic()
dists = cdist(I, np.atleast_2d(x[rowsx]), metric='euclidean')
toc()
center = np.zeros((10,k,28*28))
label_order = np.unique(L)
l=0
tic()
thing = np.zeros((k,28*28))
for labs in np.unique(L):
indices = L == labs
k_smallest = np.argpartition(dists[indices],tuple(range(1,k)),axis=None)
for i in range(0,k):
M = I[indices]
#center[l,i,:] = np.average(M[k_smallest[:i+1]],axis = 0)
if i == 0:
thing[i] = M[k_smallest[i+1]]
else:
thing[i] = thing[i-1] + M[k_smallest[i+1]]
center[l,:,:] = np.divide(thing,np.repeat(np.arange(1,11).reshape(10,1),28*28,axis=1))
l+=1
toc()
for i in range(k):
#print(cdist(center[:,i,:], np.atleast_2d(x[rowsx]), metric='euclidean'))
dists2center = cdist(center[:,i,:], np.atleast_2d(x[rowsx]), metric='euclidean')
k_smallest = np.argpartition(dists2center,tuple(range(1)),axis=None)
label[rowsx,i] = label_order[k_smallest[0]]
return label
示例6: branch_to_nodes
def branch_to_nodes(self, wt, completion):
"""
Decide which nodes to branch to next
"""
missing_edges = HGT.get_missing_edges(completion) # Obtain the missing edge sparse list
nb = self.strat.node_brancher
# Determine if there is a maximum count
count_max = min(self.strat.max_node_branch, self.num_nodes)
if nb is None or not 'name' in nb: # Default
# Gets nodes that contribute to missing edge
edge = missing_edges.indices[0] # Grab any next edge
node_indices = self.H[:, edge].indices
elif nb['name'] == 'greedy' or nb['name'] == 'long':
# Gets the nodes that overlap the most(least) with what's missing
overlap = self.H.dot(missing_edges.T)
# k = min(count_max + wt.nnz, overlap.nnz)
k = min(count_max, overlap.nnz)
if k >= self.num_nodes or k == overlap.nnz:
if nb['name'] == 'greedy':
alg_slice = np.argsort(overlap.data)[::-1]
else: # long
alg_slice = np.argsort(overlap.data)
else: # Else be smart, don't perform O(nlogn) operations, perform O(k) operations
if nb['name'] == 'greedy':
alg_slice = np.argpartition(overlap.data, -k)[-k:]
else: #long
alg_slice = np.argpartition(overlap.data, k)[:k]
node_indices = overlap.indices[alg_slice]
elif nb['name'] == 'random':
# Gets nodes that contribute to random missing edge
edge = np.random.choice(missing_edges.indices) # Grab any next edge
node_indices = self.H[:, edge].indices
elif nb['name'] == 'diverse':
# Diversify the kinds of transversals that have been found
if wt.nnz == 0: # Just starting out
node_indices = np.arange(self.num_nodes) # Branch to everything
else: # Otherwise be greedy up to one
# edge = missing_edges.indices[0] # Grab any next edge
# node_indices = [self.H[:, edge].indices[0]]
# overlap = self.H.dot(missing_edges.T)
# node_indices = [overlap.indices[np.argmax(overlap.data)]]
scaled_overlap = overlap.data / (self.node_weights[overlap.indices]**2)
node_indices = overlap.indices[np.where(np.max(scaled_overlap) == scaled_overlap)]
else:
raise ValueError("Invalid strat.node_brancher: {0}".format(self.strat.node_brancher))
if nb is not None and bool(nb.get('shuffle', False)):
np.random.shuffle(node_indices)
count = 0
for i in node_indices:
if count >= count_max:
break
if not wt[i, 0] > 0: # not already part of working transversal
self.log('Branching to node:', i)
count += 1
yield i
示例7: construct_initial_solution
def construct_initial_solution(self):
ind = np.argpartition(self.collaboration_coo.data, -len(self.villains_team))[-len(self.villains_team):]
inc = 1
while len(np.unique(self.collaboration_coo.row[ind])) < len(self.villains_team):
ind = np.argpartition(self.collaboration_coo.data, -(len(self.villains_team) + inc))[-(len(self.villains_team) + inc):]
inc += 1
heroes_team = self.heroes.loc[self.heroes[CHARACTER_ID].isin(self.collaboration_coo.row[ind])]
return heroes_team
示例8: _get_k_max_elements_indices_and_scores
def _get_k_max_elements_indices_and_scores(vec, k, mask=None):
if mask is None:
# We use argpartition here instead of argsort to achieve linear-time performance.
max_elements_indices = np.argpartition(-vec, k - 1)[:k]
else:
masked_vec = vec.copy() # To avoid side-effects
masked_vec[~mask] = -np.inf
max_elements_indices = np.argpartition(-masked_vec, k - 1)[:k]
return max_elements_indices, vec[max_elements_indices]
示例9: similarityPlot
def similarityPlot():
import matplotlib.pyplot as plt
from matplotlib import rcParams
tfidf_vectorizer = TfidfVectorizer(min_df=1)
names = friendsAboveMinNumMessages(200) + [me]
data = []
words = [] #ordering of words in tf_idf matrix
wordsSet = set() #for faster lookup
nameSet = set()
for person in personDict:
for name in person.split():
nameSet.add(name)
nameSet.add(name.lower())
for i in range(len(names)):
data.append(getAllMessagesAsString(names[i], False))
tfidf_matrix = tfidf_vectorizer.fit_transform(data)
featureNames = tfidf_vectorizer.get_feature_names()
tfidf_arr = tfidf_matrix.toarray()
for j in range(len(tfidf_arr[0])):
word = tfidf_arr[0][j]
if word not in wordsSet:
words.append(word)
wordsSet.add(j)
#nmds = manifold.MDS(metric = True, n_components = N_DISTINGUISHING_FEATURES)
#npos = nmds.fit_transform(tfidf_matrix.toarray())
clf = PCA(n_components=2)
npos = clf.fit_transform(tfidf_arr)
plt.scatter(npos[:, 0], npos[:, 1], marker = 'o', c = 'b', cmap = plt.get_cmap('Spectral')) #change colors
for name, x, y in zip(names, npos[:, 0], npos[:, 1]):
plt.annotate(
name,
xy = (x, y), xytext = (-20, 20),
textcoords = 'offset points', ha = 'right', va = 'bottom',
bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))
fig, ax = plt.subplots()
ax2 = ax.twinx()
xAxisP = [featureNames[i] for i in np.argpartition(clf.components_[0], -50)[-50:] if featureNames[i] not in nameSet]
yAxisP = [featureNames[i] for i in np.argpartition(clf.components_[1], -50)[-50:] if featureNames[i] not in nameSet]
xAxisN = [featureNames[i] for i in np.argpartition(-clf.components_[0], -50)[-50:] if featureNames[i] not in nameSet]
yAxisN = [featureNames[i] for i in np.argpartition(-clf.components_[1], -50)[-50:] if featureNames[i] not in nameSet]
ax.set_xlabel("Most Postively influential words along x axis:\n" + ", ".join(xAxisP), fontsize=18)
ax.set_ylabel("Most Postively influential words along y axis:\n" + ", ".join(yAxisP), fontsize=18)
ax2.set_xlabel("Most Negatively influential words along x axis:\n" + ", ".join(xAxisN), fontsize=18)
ax2.set_ylabel("Most Negatively influential words along y axis:\n" + ", ".join(yAxisN), fontsize=18)
# xAxis = [featureNames[i] for i in np.argpartition(np.absolute(clf.components_[0]), -50)[-50:] if featureNames[i] not in nameSet]
# yAxis = [featureNames[i] for i in np.argpartition(np.absolute(clf.components_[1]), -50)[-50:] if featureNames[i] not in nameSet]
# for i in range(1, max(len(xAxis), len(yAxis)) ):
# if i % 20 == 0 and i < len(xAxis):
# xAxis[i] += "\n"
# if i % 15 == 0 and i < len(yAxis):
# yAxis[i] += "\n"
# plt.xlabel("Most influential words along x axis:\n" + ", ".join(xAxis), fontsize=18)
# plt.ylabel("Most influential words along y axis:\n" + ", ".join(yAxis), fontsize=18)
rcParams.update({'figure.autolayout': True})
plt.suptitle("Word-Usage Similarity Scatterplot", fontsize = 24, fontweight = 'bold')
plt.show()
示例10: _phase2
def _phase2(self):
"""
Execute phase 2 of the SP region. This phase is used to compute the
active columns.
Note - This should only be called after phase 1 has been called and
after the inhibition radius and neighborhood have been updated.
"""
# Shift the outputs
self.y[:, 1:] = self.y[:, :-1]
self.y[:, 0] = 0
# Calculate k
# - For a column to be active its overlap must be at least as large
# as the overlap of the k-th largest column in its neighborhood.
k = self._get_num_cols()
if self.global_inhibition:
# The neighborhood is all columns, thus the set of active columns
# is simply columns that have an overlap >= the k-th largest in the
# entire region
# Compute the winning column indexes
if self.learn:
# Randomly break ties
ix = np.argpartition(-self.overlap[:, 0] -
self.prng.uniform(.1, .2, self.ncolumns), k - 1)[:k]
else:
# Choose the same set of columns each time
ix = np.argpartition(-self.overlap[:, 0], k - 1)[:k]
# Set the active columns
self.y[ix, 0] = self.overlap[ix, 0] > 0
else:
# The neighborhood is bounded by the inhibition radius, therefore
# each column's neighborhood must be considered
for i in xrange(self.ncolumns):
# Get the neighbors
ix = np.where(self.neighbors[i])[0]
# Compute the minimum top overlap
if ix.shape[0] <= k:
# Desired number of candidates is at or below the desired
# activity level, so find the overall min
m = max(bn.nanmin(self.overlap[ix, 0]), 1)
else:
# Desired number of candidates is above the desired
# activity level, so find the k-th largest
m = max(-np.partition(-self.overlap[ix, 0], k - 1)[k - 1],
1)
# Set the column activity
if self.overlap[i, 0] >= m: self.y[i, 0] = True
示例11: _build_recursive
def _build_recursive(indices, level=0, split_index=0):
"""
Descend recursively into tree to build it, setting splits and
returning indices for leaves
:param indices: The current set of indices before partitioning
:param level: The level in the tree
:param split_index: The index of the split to set
:return: A list of arrays representing leaf membership
:rtype: list[np.ndarray]
"""
# If we're at the bottom, no split, just return the set
if level == self._depth:
return [indices]
n = indices.size
# If we literally don't have enough to populate the leaf, make it
# empty
if n < 1:
return []
# Get the random projections for these indices at this level
# NB: Recall that the projection matrix has shape (levels, N)
level_proj = proj[indices, level]
# Split at the median if even, put median in upper half if not
n_split = n // 2
if n % 2 == 0:
part_indices = np.argpartition(
level_proj, (n_split - 1, n_split))
split_val = level_proj[part_indices[n_split - 1]]
split_val += level_proj[part_indices[n_split]]
split_val /= 2.0
else:
part_indices = np.argpartition(level_proj, n_split)
split_val = level_proj[part_indices[n_split]]
splits[split_index] = split_val
# part_indices is relative to this block of values, recover
# main indices
left_indices = indices[part_indices[:n_split]]
right_indices = indices[part_indices[n_split:]]
# Descend into each split and get sub-splits
left_out = _build_recursive(left_indices, level=level + 1,
split_index=2 * split_index + 1)
right_out = _build_recursive(right_indices, level=level + 1,
split_index=2 * split_index + 2)
# Assemble index set
left_out.extend(right_out)
return left_out
示例12: fitOneLinearRegression
def fitOneLinearRegression(thetaLinear, IntensityLinear, tiltanglesArray, options):
if (len(tiltanglesArray)%2 == 1):
halfN = int(len(tiltanglesArray)/2) + 1
xLeft, yLeft = thetaLinear[0:halfN], IntensityLinear[0:halfN]
xRight, yRight = thetaLinear[halfN-1:], IntensityLinear[halfN-1:]
else:
halfN = int(len(tiltanglesArray)/2)
xLeft, yLeft = thetaLinear[0:halfN], IntensityLinear[0:halfN]
xRight, yRight = thetaLinear[halfN:], IntensityLinear[halfN:]
slopeLeft, interceptLeft, r2Left = linearRegression(xLeft, yLeft)
slopeRight, interceptRight, r2Right = linearRegression(xRight, yRight)
assert(len(xLeft)==len(xRight))
fitLeft = slopeLeft*xLeft + interceptLeft
fitRight = slopeRight*xRight + interceptRight
#the sum of squared residuals
resLeft = yLeft - fitLeft
resLeft = resLeft / fitLeft
#print "resLeft", resLeft
resRight = yRight - fitRight
resRight = resRight / fitRight
#print "resRight", resRight
fresLeft = sum(resLeft**2)
fresRight = sum(resRight**2)
fres = [fresLeft*1000000, fresRight*1000000]
#find the points with the largest 3 residuals in left and right branches, use numpy.argpartition
#N = options.largestNRes
N=3
negN = (-1)*N
indexLargeLeft = np.argpartition(resLeft**2, negN)[negN:]
indexLargeRight = np.argpartition(resRight**2, negN)[negN:]
M=3
#M = options.smallestNRes
posM = M
indexSmallLeft = np.argpartition(resLeft**2, posM)[:posM]
indexSmallRight = np.argpartition(resRight**2, posM)[:posM]
#MSE, under the assumption that the population error term has a constant variance, the estimate of that variance is given by MSE, mean square error
#The denominator is the sample size reduced by the number of model parameters estimated from the same data, (n-p) for p regressors or (n-p-1) if an intercept is used.
#In this case, p=1 so the denominator is n-2.
stdResLeft = np.std(resLeft, ddof=2)
stdResRight = np.std(resRight, ddof=2)
stdRes = [stdResLeft*1000, stdResRight*1000]
ret = fres, stdRes, xLeft, yLeft, fitLeft, xRight, yRight, fitRight, indexLargeLeft, indexLargeRight, indexSmallLeft, indexSmallRight, resLeft, resRight, slopeLeft, interceptLeft, slopeRight, interceptRight
return ret
示例13: define_toplogy
def define_toplogy(self, num_input, num_hidden, num_output, density):
"""
Defines the topology of the OpenBrain network.
:param num_input:
:param num_hidden:
:param num_output:
:param density:
:return:
"""
topo = networkx.DiGraph(networkx.watts_strogatz_graph(self.num_neurons, 5, density, seed=None)).to_directed()
adjacency_list = topo.adjacency_list()
# Pick the output neurons to be those with highest in degree
in_deg = np.array([topo.in_degree(x) for x,_ in enumerate(adjacency_list)])
self.output_neurons = np.argpartition(in_deg, -num_output)[-num_output:]
print(self.output_neurons)
print([topo.in_degree(x) for x in self.output_neurons])
# Pick the input neurons to be those with highest out degree
out_deg = np.array([topo.out_degree(x) if x not in self.output_neurons else -1
for x,_ in enumerate(adjacency_list)])
self.input_neurons = np.argpartition(out_deg, -num_input)[-num_input:]
# Output neurons do not fire out.
for adjacent_neurons in adjacency_list:
for out_neuron in self.output_neurons:
if out_neuron in adjacent_neurons:
adjacent_neurons.remove(out_neuron)
# Disconnect input -> output
for out in self.output_neurons:
for inp in self.input_neurons:
if out in adjacency_list[inp]: adjacency_list[inp].remove(out)
if inp in adjacency_list[out]: adjacency_list[out].remove(inp)
for i, adjacent in enumerate(adjacency_list):
if i not in self.input_neurons and i not in self.output_neurons:
for n in adjacent:
if i in adjacency_list[n]:
if np.random.rand(1)>0.5:
adjacent.remove(n)
else:
adjacency_list[n].remove(i)
# Let nothing enter the input neurons
for inp in self.input_neurons:
adjacency_list[inp] = []
return adjacency_list
示例14: sort_by_relative_entropy
def sort_by_relative_entropy(corpus, topicct, stemmer):
# get the right file names for the corpus and count
stemmed_weights = ['wordweights/' + fname for fname in os.listdir('wordweights')
if fname.startswith('{}-{}-{}'.format(corpus, stemmer, topicct))]
unstemmed_weights = ['wordweights/' + fname for fname in os.listdir('wordweights')
if fname.startswith('{}-{}-{}'.format(corpus, UNSTEMMED_NAME, topicct))]
stemmed_corpus_file = 'corpora/{}-train-{}-stopped.txt'.format(corpus, stemmer)
unstemmed_corpus_file = 'corpora/{}-train-{}-stopped.txt'.format(corpus, UNSTEMMED_NAME)
# get the mapping from unstemmed to stemmed words
stemmed_to_unstemmed = defaultdict(set)
unstemmed_counts = Counter()
with open(stemmed_corpus_file) as f, open(unstemmed_corpus_file) as g:
for stemmed_line in f:
stemmed_words = stemmed_line.split()[3:]
unstemmed_words = g.readline().split()[3:]
assert(len(stemmed_words) == len(unstemmed_words))
for uword, sword in zip(unstemmed_words, stemmed_words):
stemmed_to_unstemmed[sword].add(uword)
unstemmed_counts[uword] += 1
# for each file; for each word; get the entropy
stemmed_entropies = defaultdict(list)
unstemmed_entropies = defaultdict(list)
for file in stemmed_weights:
entropy_dict = get_stemmed_entropy_per_word(file)
for k, v in entropy_dict.iteritems():
stemmed_entropies[k].append(v)
for file in unstemmed_weights:
entropy_dict = get_unstemmed_entropy_per_word(file, stemmed_to_unstemmed, int(topicct))
for k, v in entropy_dict.iteritems():
unstemmed_entropies[k].append(v)
# compute difference of average entropies
stemmed_vocab = [sword for sword, uwords in stemmed_to_unstemmed.iteritems() if len(uwords) > 1]
entropy_diffs = np.zeros(len(stemmed_vocab))
for i, sword in enumerate(stemmed_vocab):
entropy_diffs[i] = np.mean(stemmed_entropies[sword]) - np.mean(unstemmed_entropies[sword])
# find top 50 maximum and minimum entropies
min_indices = np.argpartition(entropy_diffs, 50)[:50]
max_indices = np.argpartition(entropy_diffs, -50)[-50:]
with open('wordlists/{}-{}-{}.txt'.format(corpus, stemmer, topicct), 'w') as wf:
wf.write('Lowest entropy differences (stemmed is better)\n')
for i in min_indices:
wf.write('{}\t{}\t{}\n'.format(entropy_diffs[i], stemmed_vocab[i], ' '.join(stemmed_to_unstemmed[stemmed_vocab[i]])))
wf.write('Highest entropy differences (unstemmed is better)\n')
for i in max_indices:
wf.write('{}\t{}\t{}\n'.format(entropy_diffs[i], stemmed_vocab[i], ' '.join(stemmed_to_unstemmed[stemmed_vocab[i]])))
示例15: computeRanks
def computeRanks(composedSpace, observedSpace):
"""Ranks all the representations in the composed space with respect to
the representations in the observed space. Cut-off value 1000"
"""
ranks = {}
rankList = []
composedWords = set(composedSpace.get_id2row())
observedWords = observedSpace.get_id2row()
neighbours = 1000
for w_idx, word in enumerate(composedWords):
vector = composedSpace.get_row(word)
Y = 1 - cdist(vector.mat, observedSpace.get_cooccurrence_matrix().mat, 'cosine')
nearest = Y.argmax()
nearest_k_indices = np.argpartition(Y, tuple([-p for p in range(neighbours)]), axis=None)[-neighbours:]
# pp([(observedWords[idx], Y[0][idx]) for idx in reversed(nearest_k_indices)])
words = [observedWords[idx] for idx in reversed(nearest_k_indices)]
wordRanks = {word:index+1 for index,word in enumerate(words)}
# print(wordRanks)
if (word in wordRanks):
r = wordRanks[word]
ranks[word] = r
rankList.append(r)
else:
ranks[word] = 1000
rankList.append(1000)
if ((w_idx > 0) and (w_idx % 100 == 0)):
print(w_idx)
return rankList, ranks