當前位置: 首頁>>代碼示例>>Python>>正文


Python Document._term_frequency方法代碼示例

本文整理匯總了Python中models.Document._term_frequency方法的典型用法代碼示例。如果您正苦於以下問題:Python Document._term_frequency方法的具體用法?Python Document._term_frequency怎麽用?Python Document._term_frequency使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在models.Document的用法示例。


在下文中一共展示了Document._term_frequency方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: retrive_documents

# 需要導入模塊: from models import Document [as 別名]
# 或者: from models.Document import _term_frequency [as 別名]
 def retrive_documents(self,query_id):
     k1 = 1.2
     k3 = 8.00
     avg_dl = 122
     b = 1 # from 0.25 to 2.00 increase 0.25
     q = Query(query_id)
     #q.set_concepts(self.QueryConceptExtraction(q.text))
     self._expand_query(q)
     return
     print "Retrieving Documents for: ", q.text
     Collection._load()
     Collection._load_go()
     Collection._load_tags()
     Collection._load_indexes()      #Loads documents into _documents with PMID and Index
     score = dict()
     N = Collection._count
     Nt = dict()
     for term in q.text:
         Nt[term] = Collection._get_frequency(term)
     counter = 0
     for doc in Collection._documents:
         summation = 0;
         dl = doc.length * 1.00
         for t in q.text:
             tfn = doc.get_frequency(t)
             QQ = ' '.join(q.text)
             qtf = Document._term_frequency(QQ, t)
             K = k1*((1-b)+b*(dl/avg_dl))
             w = log((N-Nt[t]+0.5)/(Nt[t]+0.5),2)
             if w<0:
                 #this makes the result a negative number
                 # if we break the result will be bigger than or equal to zero
                 break
             p1 = (((k1+1)*tfn)/(K+tfn))
             p2 = ((k3+1)*qtf/(k3+qtf))
             p3 = w
             summation += p1*p2*p3
         score[doc.PMID] = summation
         counter += 1
開發者ID:armanfatahi,項目名稱:ContextSensitiveIR,代碼行數:41,代碼來源:IR.py

示例2: _expand_query

# 需要導入模塊: from models import Document [as 別名]
# 或者: from models.Document import _term_frequency [as 別名]
 def _expand_query(self,q):
     #--STEP 1----------Extract TOP DOCUMENTS ----------------------------
     tp = TextProcessor()
     param = Parameter()
     k1      = 1.2
     k3      = 8.00
     avg_dl  = 122
     b       = 1                     # from 0.25 to 2.00 increase 0.25    
     Collection._load_indexes()      # Loads indexes into _documents
     N = len(Collection._documents)
     score = dict()
     for D in Collection._documents:
         summation = 0;
         dl = D.length * 1.00
         for t in q.text:
             Nt = Collection._get_frequency(t)
             tfn = D.get_frequency(t)
             qtf = q.get_frequency(t)
             K = k1*((1-b)+b*(dl/avg_dl))
             w = log((N-Nt+0.5)/(Nt+0.5),2)
             if w<0:
                 #this makes the result a negative number
                 # if we break the result will be bigger than or equal to zero
                 break
             p1 = (((k1+1)*tfn)/(K+tfn))
             p2 = ((k3+1)*qtf/(k3+qtf))
             p3 = w
             summation += p1*p2*p3
             
         score[D.PMID] = summation
     M = param.GetDocNumberForLocalContext()
     TopDocs = []
     TopNums = []
     new_score = dict()
     for item in score.iterkeys():
         if score[item] > 0:
             new_score[item] = score[item]
     
     for i in range(M):
         TopNums.append(0)
         TopDocs.append('')
     for D in score.iterkeys():
         for i in range(M):
             if score[D] > TopNums[i]:
                 for j in range(M-i-1):
                     TopDocs[M-j-1] = TopDocs[M-j-2]
                     TopNums[M-j-1] = TopNums[M-j-2]
                 TopDocs[i] = D
                 TopNums[i] = score[D]
                 break
     Display._plot(new_score, q)
     TopDocsTexts = ''        
     TopDocsTexts = tp.Tokenize(TopDocsTexts)
     TopDocsTexts = TextProcessor._remove_stop_words(TopDocsTexts)
     #---STEP 2---------Calculate weight of each term which is a member of new query----------------------------
     K = TopDocsTexts
     Beta = 0.4
     weight = dict()
     MaxTFQ = 0.001
     for term in TopDocsTexts:
         tfq = q.get_frequency(term)
         if tfq > MaxTFQ:
             MaxTFQ = tfq
     tfqN = 0
     MaxInfo = 0
     for term in TopDocsTexts:
         Lambda = Document._term_frequency(' '.join(K), term)
         Freq_t_k = Document._term_frequency(' '.join(K), term)
         log1 = log(1.00/(1.00+Lambda),2)
         log2 = log(Lambda/(1.00+Lambda),2)
         InfoBO1 = -log1 - Freq_t_k * log2
         if InfoBO1 > MaxInfo:
             MaxInfo = InfoBO1
     for term in TopDocsTexts:
         Lambda = Document._term_frequency(' '.join(K), term)
         Freq_t_k = Document._term_frequency(' '.join(K), term)
         log1 = log(1.00/(1.00+Lambda),2)
         log2 = log(Lambda/(1.00+Lambda),2)
         InfoBO1 = -log1 - Freq_t_k * log2
         tfq = q.get_frequency(term)
         tfqN = (tfq +0.00) /MaxTFQ
         if MaxInfo >0 :
             weight[term] = tfqN + Beta*(InfoBO1/MaxInfo)
         else:
             weight[term] = 0
     QPrime = []
     for term in weight.iterkeys():
         if weight[term] > 0.25:
             QPrime.append(term)
     return  QPrime
開發者ID:armanfatahi,項目名稱:ContextSensitiveIR,代碼行數:92,代碼來源:IR.py


注:本文中的models.Document._term_frequency方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。