本文整理汇总了Python中index.Index.read_index方法的典型用法代码示例。如果您正苦于以下问题:Python Index.read_index方法的具体用法?Python Index.read_index怎么用?Python Index.read_index使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类index.Index
的用法示例。
在下文中一共展示了Index.read_index方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: build_parallel_corpus
# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import read_index [as 别名]
def build_parallel_corpus():
""" return dicts containing the parallel corpus
entries
"""
con = psycopg2.connect(database='quora', user='k')
cur = con.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE, cur)
# we actually only need to cPickle the CountVectorizer object
# so we can load the stop words and other preprocessing opt.
index_directory = '/home/kyrre/michaeljackson'
idx = Index.read_index(index_directory)
query = """SELECT DISTINCT(Question.qid), concat(main, ' ', info) question , A.contents answers
FROM Question
JOIN (SELECT string_agg(content, ' ') as contents, qid FROM Answer GROUP BY qid) A
ON Question.qid = A.qid LIMIT 5000;
"""
cur.execute(query)
a = idx.count_vect.transform(SQL_generator(cur, 'answers'))
cur.execute(query)
q = idx.count_vect.transform(SQL_generator(cur, 'question'))
od = lambda x: np.squeeze(np.asarray(x))
asum = od(a.sum(axis=1))
qsum = od(q.sum(axis=1))
nnz_indices = np.intersect1d(od(np.argwhere(asum != 0)),
od(np.argwhere(qsum != 0)))
a = a[nnz_indices,:]
q = q[nnz_indices,:]
asum = od(a.sum(axis=0))
qsum = od(q.sum(axis=0))
nnz_cols_indices = np.intersect1d(od(np.argwhere(asum != 0)),
od(np.argwhere(qsum != 0)))
a = a[:, nnz_cols_indices]
q = q[:, nnz_cols_indices]
assert a.shape == q.shape
assert_sorted_indices(a)
assert_sorted_indices(q)
scipy.io.mmwrite(open('a.mtx', 'w'), a)
scipy.io.mmwrite(open('q.mtx', 'w'), q)
示例2: build_parallel_corpus
# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import read_index [as 别名]
def build_parallel_corpus():
""" return dicts containing the parallel corpus
entries
"""
con = psycopg2.connect(database='quora', user='k')
cur = con.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE, cur)
# we actually only need to cPickle the CountVectorizer object
# so we can load the stop words and other preprocessing opt.
index_directory = '/home/kyrre/michaeljackson'
idx = Index.read_index(index_directory)
query = """SELECT DISTINCT(Question.qid), concat(main, ' ', info) question , A.contents answers from Question JOIN (SELECT
string_agg(content, ' ') as contents, qid FROM Answer GROUP BY qid) A ON
Question.qid = A.qid;
"""
cur.execute(query)
# parallel corpora
questions = OrderedDict()
answers = OrderedDict()
for record in cur:
qdata, qindices = idx.count_vect.featurize(record['question'])
adata, aindices = idx.count_vect.featurize(record['answers'])
# skip "empty" entries
if adata.size == 0 or qdata.size == 0:
continue
questions[record['qid']] = [qdata, qindices]
answers[record['qid']] = [adata, aindices]
container = Bunch(questions=questions, answers=answers, count_vect = idx.count_vect)
return container