本文整理汇总了Python中document.Document.make_vectors方法的典型用法代码示例。如果您正苦于以下问题:Python Document.make_vectors方法的具体用法?Python Document.make_vectors怎么用?Python Document.make_vectors使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类document.Document
的用法示例。
在下文中一共展示了Document.make_vectors方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parseTrainFile
# 需要导入模块: from document import Document [as 别名]
# 或者: from document.Document import make_vectors [as 别名]
def parseTrainFile(trainFile):
train_f = open(trainFile, 'r')
queries = []
docs = []
query2docs = {}
for line in train_f:
key = line.split(':', 1)[0].strip()
value = line.split(':', 1)[1].strip()
if (key == 'query'):
query = value
queries.append(query)
query2docs[query] = []
elif (key == 'url'):
doc = Document(query)
docs.append(doc)
query2docs[query].append(doc)
doc.set_url(value)
elif (key == 'title'):
doc.set_title(value)
elif (key == 'header'):
doc.add_header(value)
elif (key == 'body_hits'):
temp = value.split(' ', 1)
doc.add_body_hits(temp[0].strip(), map(int, temp[1].strip().split()))
elif (key == 'body_length'):
doc.set_body_length(int(value))
elif (key == 'pagerank'):
doc.set_pagerank(int(value))
elif (key == 'anchor_text'):
anchor_text = value
elif (key == 'stanford_anchor_count'):
doc.add_anchor(anchor_text, int(value))
train_f.close()
doc_count = len(docs)
avlen_url = sum(doc.url_length for doc in docs) / doc_count
avlen_title = sum(doc.title_length for doc in docs) / doc_count
avlen_header = sum(doc.header_length for doc in docs) / doc_count
avlen_body = sum(doc.body_length for doc in docs) / doc_count
avlen_anchor = sum(doc.anchor_length for doc in docs) / doc_count
with open('DocFreqDict', 'rb') as doc_freq_f:
doc_freq_dict = marshal.load(doc_freq_f)
for doc in docs:
doc.set_averages(avlen_url, avlen_title, avlen_header, avlen_body, avlen_anchor)
doc.make_vectors(doc_freq_dict)
return queries, docs, query2docs