Python Index.read_index方法代码示例

本文整理汇总了Python中index.Index.read_index方法的典型用法代码示例。如果您正苦于以下问题：Python Index.read_index方法的具体用法？Python Index.read_index怎么用？Python Index.read_index使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类index.Index的用法示例。

在下文中一共展示了Index.read_index方法的2个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: build_parallel_corpus

# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import read_index [as 别名]
def build_parallel_corpus():
    """ return dicts containing the parallel corpus 
        entries 
    """

    con = psycopg2.connect(database='quora', user='k')
    cur = con.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
    psycopg2.extensions.register_type(psycopg2.extensions.UNICODE, cur)

    # we actually only need to cPickle the CountVectorizer object
    # so we can load the stop words and other preprocessing opt.
    index_directory = '/home/kyrre/michaeljackson'
    idx = Index.read_index(index_directory)

    query = """SELECT DISTINCT(Question.qid), concat(main, ' ', info) question , A.contents answers 
               FROM Question 
               JOIN (SELECT string_agg(content, ' ') as contents, qid FROM Answer GROUP BY qid) A 
               ON Question.qid = A.qid LIMIT 5000;
            """

    cur.execute(query)
    a = idx.count_vect.transform(SQL_generator(cur, 'answers'))

    cur.execute(query)
    q = idx.count_vect.transform(SQL_generator(cur, 'question'))

    od = lambda x: np.squeeze(np.asarray(x))

    asum = od(a.sum(axis=1))
    qsum = od(q.sum(axis=1))

    nnz_indices = np.intersect1d(od(np.argwhere(asum != 0)), 
                                od(np.argwhere(qsum != 0)))
    a = a[nnz_indices,:]
    q = q[nnz_indices,:]

    asum = od(a.sum(axis=0))
    qsum = od(q.sum(axis=0))

    nnz_cols_indices = np.intersect1d(od(np.argwhere(asum != 0)), 
                                      od(np.argwhere(qsum != 0)))

    a = a[:, nnz_cols_indices]
    q = q[:, nnz_cols_indices]

    assert a.shape == q.shape

    assert_sorted_indices(a)
    assert_sorted_indices(q)

    scipy.io.mmwrite(open('a.mtx', 'w'), a)
    scipy.io.mmwrite(open('q.mtx', 'w'), q)

开发者ID:kyrre，项目名称:translation，代码行数:54，代码来源:c.py

示例2: build_parallel_corpus

# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import read_index [as 别名]
def build_parallel_corpus():
    """ return dicts containing the parallel corpus 
        entries 
    """

    con = psycopg2.connect(database='quora', user='k')
    cur = con.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
    psycopg2.extensions.register_type(psycopg2.extensions.UNICODE, cur)

    # we actually only need to cPickle the CountVectorizer object
    # so we can load the stop words and other preprocessing opt.
    index_directory = '/home/kyrre/michaeljackson'
    idx = Index.read_index(index_directory)

    query = """SELECT DISTINCT(Question.qid), concat(main, ' ', info) question , A.contents answers from Question JOIN (SELECT
            string_agg(content, ' ') as contents, qid FROM Answer GROUP BY qid) A ON
            Question.qid = A.qid;
            """

    cur.execute(query)

    # parallel corpora
    questions = OrderedDict()
    answers = OrderedDict()

    for record in cur:
        
        qdata, qindices = idx.count_vect.featurize(record['question'])   
        adata, aindices = idx.count_vect.featurize(record['answers'])   

        # skip "empty" entries 
        if adata.size == 0 or qdata.size == 0:
            continue 

        questions[record['qid']] = [qdata, qindices]
        answers[record['qid']] = [adata, aindices]
    
    container = Bunch(questions=questions, answers=answers, count_vect = idx.count_vect)

    return container

开发者ID:kyrre，项目名称:translation，代码行数:42，代码来源:create_corpus.py

注：本文中的index.Index.read_index方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。