本文整理匯總了Python中models.Document._term_frequency方法的典型用法代碼示例。如果您正苦於以下問題:Python Document._term_frequency方法的具體用法?Python Document._term_frequency怎麽用?Python Document._term_frequency使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類models.Document
的用法示例。
在下文中一共展示了Document._term_frequency方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: retrive_documents
# 需要導入模塊: from models import Document [as 別名]
# 或者: from models.Document import _term_frequency [as 別名]
def retrive_documents(self,query_id):
k1 = 1.2
k3 = 8.00
avg_dl = 122
b = 1 # from 0.25 to 2.00 increase 0.25
q = Query(query_id)
#q.set_concepts(self.QueryConceptExtraction(q.text))
self._expand_query(q)
return
print "Retrieving Documents for: ", q.text
Collection._load()
Collection._load_go()
Collection._load_tags()
Collection._load_indexes() #Loads documents into _documents with PMID and Index
score = dict()
N = Collection._count
Nt = dict()
for term in q.text:
Nt[term] = Collection._get_frequency(term)
counter = 0
for doc in Collection._documents:
summation = 0;
dl = doc.length * 1.00
for t in q.text:
tfn = doc.get_frequency(t)
QQ = ' '.join(q.text)
qtf = Document._term_frequency(QQ, t)
K = k1*((1-b)+b*(dl/avg_dl))
w = log((N-Nt[t]+0.5)/(Nt[t]+0.5),2)
if w<0:
#this makes the result a negative number
# if we break the result will be bigger than or equal to zero
break
p1 = (((k1+1)*tfn)/(K+tfn))
p2 = ((k3+1)*qtf/(k3+qtf))
p3 = w
summation += p1*p2*p3
score[doc.PMID] = summation
counter += 1
示例2: _expand_query
# 需要導入模塊: from models import Document [as 別名]
# 或者: from models.Document import _term_frequency [as 別名]
def _expand_query(self,q):
#--STEP 1----------Extract TOP DOCUMENTS ----------------------------
tp = TextProcessor()
param = Parameter()
k1 = 1.2
k3 = 8.00
avg_dl = 122
b = 1 # from 0.25 to 2.00 increase 0.25
Collection._load_indexes() # Loads indexes into _documents
N = len(Collection._documents)
score = dict()
for D in Collection._documents:
summation = 0;
dl = D.length * 1.00
for t in q.text:
Nt = Collection._get_frequency(t)
tfn = D.get_frequency(t)
qtf = q.get_frequency(t)
K = k1*((1-b)+b*(dl/avg_dl))
w = log((N-Nt+0.5)/(Nt+0.5),2)
if w<0:
#this makes the result a negative number
# if we break the result will be bigger than or equal to zero
break
p1 = (((k1+1)*tfn)/(K+tfn))
p2 = ((k3+1)*qtf/(k3+qtf))
p3 = w
summation += p1*p2*p3
score[D.PMID] = summation
M = param.GetDocNumberForLocalContext()
TopDocs = []
TopNums = []
new_score = dict()
for item in score.iterkeys():
if score[item] > 0:
new_score[item] = score[item]
for i in range(M):
TopNums.append(0)
TopDocs.append('')
for D in score.iterkeys():
for i in range(M):
if score[D] > TopNums[i]:
for j in range(M-i-1):
TopDocs[M-j-1] = TopDocs[M-j-2]
TopNums[M-j-1] = TopNums[M-j-2]
TopDocs[i] = D
TopNums[i] = score[D]
break
Display._plot(new_score, q)
TopDocsTexts = ''
TopDocsTexts = tp.Tokenize(TopDocsTexts)
TopDocsTexts = TextProcessor._remove_stop_words(TopDocsTexts)
#---STEP 2---------Calculate weight of each term which is a member of new query----------------------------
K = TopDocsTexts
Beta = 0.4
weight = dict()
MaxTFQ = 0.001
for term in TopDocsTexts:
tfq = q.get_frequency(term)
if tfq > MaxTFQ:
MaxTFQ = tfq
tfqN = 0
MaxInfo = 0
for term in TopDocsTexts:
Lambda = Document._term_frequency(' '.join(K), term)
Freq_t_k = Document._term_frequency(' '.join(K), term)
log1 = log(1.00/(1.00+Lambda),2)
log2 = log(Lambda/(1.00+Lambda),2)
InfoBO1 = -log1 - Freq_t_k * log2
if InfoBO1 > MaxInfo:
MaxInfo = InfoBO1
for term in TopDocsTexts:
Lambda = Document._term_frequency(' '.join(K), term)
Freq_t_k = Document._term_frequency(' '.join(K), term)
log1 = log(1.00/(1.00+Lambda),2)
log2 = log(Lambda/(1.00+Lambda),2)
InfoBO1 = -log1 - Freq_t_k * log2
tfq = q.get_frequency(term)
tfqN = (tfq +0.00) /MaxTFQ
if MaxInfo >0 :
weight[term] = tfqN + Beta*(InfoBO1/MaxInfo)
else:
weight[term] = 0
QPrime = []
for term in weight.iterkeys():
if weight[term] > 0.25:
QPrime.append(term)
return QPrime