本文整理汇总了Python中Document.Document.add_body_hits方法的典型用法代码示例。如果您正苦于以下问题:Python Document.add_body_hits方法的具体用法?Python Document.add_body_hits怎么用?Python Document.add_body_hits使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Document.Document
的用法示例。
在下文中一共展示了Document.add_body_hits方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse
# 需要导入模块: from Document import Document [as 别名]
# 或者: from Document.Document import add_body_hits [as 别名]
def parse(self):
queries = []
c_query = None
c_doc = None
self.avg_anchor_length = 0
self.avg_title_length = 0
self.avg_body_length = 0
self.docs = 0
self.titles = 0
self.anchors = 0
f = open(self.queryDocPath, 'r')
line = f.readline().strip()
while True:
if line == None or line == "":
break
elif line.startswith('query'):
c_query = Query(self.idf)
queries.append(c_query)
c_query.set_query(line.split(':')[1])
elif line.startswith('url'):
self.docs+=1
if not c_doc == None:
c_doc.calculate_vectors()
c_doc = Document(c_query)
c_query.add_doc(c_doc)
c_doc.set_url(line)
elif line.startswith('title'):
c_doc.set_title(line.split(':')[1])
self.avg_title_length += len(c_doc.title_terms)
self.titles += 1
elif line.startswith('body_hits'):
x = line.split(':')[1].strip().split()
term = x[0]
hits = x[1:]
c_doc.add_body_hits(term, hits)
elif line.startswith('body_length'):
c_doc.set_body_length(line.split(':')[1])
self.avg_body_length += c_doc.body_length
elif line.startswith('anchor_text'):
text = line.split(':')[1].strip()
line = f.readline()
count = line.split(':')[1].strip()
c_doc.add_anchor_text(text, count)
self.avg_anchor_length += int(count)
self.avg_anchors_per_doc += int(count)
line = f.readline().strip()
if not c_doc == None:
c_doc.calculate_vectors()
#We calculate avg anchor length as follows:
#Consider all words in anchor text for a doc as one BIG document, so count up all occurrences of anchor words
# and divide them by the number of docs
self.avg_anchor_length = self.avg_anchor_length*1.0/self.docs
self.avg_title_length = self.avg_title_length*1.0/self.titles
self.avg_body_length = self.avg_body_length*1.0/self.docs
self.avg_anchors_per_doc = self.avg_anchors_per_doc*1.0 / self.docs
return queries