当前位置: 首页>>代码示例>>Python>>正文


Python Utility.cosine方法代码示例

本文整理汇总了Python中utility.Utility.cosine方法的典型用法代码示例。如果您正苦于以下问题:Python Utility.cosine方法的具体用法?Python Utility.cosine怎么用?Python Utility.cosine使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在utility.Utility的用法示例。


在下文中一共展示了Utility.cosine方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from utility import Utility [as 别名]
# 或者: from utility.Utility import cosine [as 别名]
def main(originalFile, w2vFile, w2vDimension, topicModelFile, topicModelDimension, infoInstance, tfidfInstance, hasUrlInstance, ansProInstance):

    bowDict = {}
    w2vDict = {}
    tmDict = {}
    
    cuserComQuser = {}  #cid, 0 or 1, compared with quserid
    ansProDict = {}     #cid, category_cgold probability
    tfidfDict = {}      #cid, tfidfScore
    urlDict = {}
    
    resultDict = {}
    
    utility = Utility()
    w2v = W2V(w2vFile, w2vDimension)
    tm = TopicModel(topicModelFile, topicModelDimension)
    
    files = [f for f in listdir(originalFile) if isdir(join(originalFile, f))]
    for directory in files:
        path = originalFile + directory
        fileList = [f for f in listdir(path) if isfile(join(path, f))]
        
        #question file
        with open(path + "/" + directory, "r") as fin:
            s1 = fin.read()
            vec1 = w2v.sentenceVector(s1)
            t1 = tm.getProbability(directory)
            
        #comment file
        for each in fileList:
            if each == directory:
                continue
            
            qid = directory
            cid = each
            cuserid = infoInstance.cidToCuserid(cid)           
            quserid = infoInstance.cidToQuserid(cid)
            qcategory = infoInstance.qidToCategory(qid) 
            
            
            if cuserid == quserid:
                cuserComQuser[cid] = 1.0
            else:
                cuserComQuser[cid] = 0.0           
           

            '''
            #notice, record the categoryAnsPro of train set first using following commands  
            #after that you can use the command of "ansProDict[cid] = ansProInstance.getCategoryPro(qcategory)" in train, dev and test set
            
            ansProDict[cid] = infoInstance.getCategoryAnsPro(qcategory)
            cg = open("categoryAnsProTrain.txt", "a+")
            cg.write(qcategory + "\t")
            for i in range(len(ansProDict[cid])):
                cg.write(str(ansProDict[cid][i]) + "\t")
            cg.write("\n")
            ''' 
            
            ansProDict[cid] = ansProInstance.getCategoryPro(qcategory)
            tfidfDict[cid] = tfidfInstance.getTfidfScore(cid)
            urlDict[cid] = hasUrlInstance.isExistUrl(cid) 
            
            completePath = path + "/" + each          
            with open(completePath, "r") as fin:
                s2 = fin.read()
                #some questions & comments are empty after preProcessing
                if not s1 or not s2:
                    bowDict[each] = 0.000000000001
                    w2vDict[each] = 0.000000000001
                    tmDict[each] = 0.000000000001
                    continue

                bow = BOW(s1, s2)   
                v1, v2 = bow.getVector()
                score = utility.cosine(v1, v2)
                bowDict[each] = score
                               
                vec2 = w2v.sentenceVector(s2)
                score = utility.cosine(vec1, vec2)               
                w2vDict[each] = score
                
                t2 = tm.getProbability(each)
                score = utility.cosine(t1, t2)
                tmDict[each] = score

    print "bowDict, w2vDict, tmDict done!"    
    for key in bowDict:
        aList = []
        aList.append(bowDict[key])
        aList.append(w2vDict[key])
        aList.append(tmDict[key])
        aList.append(cuserComQuser[key])
        for i in range(len(ansProDict[key])):    
            aList.append(ansProDict[key][i])
        aList.append(tfidfDict[key])
        aList.append(urlDict[key])
        resultDict[key] = aList
    print "resultDict done!"
    return resultDict   
开发者ID:yaolili,项目名称:QAClassification,代码行数:101,代码来源:getFeatureVector.py

示例2: main

# 需要导入模块: from utility import Utility [as 别名]
# 或者: from utility.Utility import cosine [as 别名]
def main(originalFile, w2vFile, w2vDimension, topicModelFile, topicModelDimension, infoInstance, tfidfInstance, hasUrlInstance, ansProInstance, ynInstance):

    cidList = ynInstance.getCidList()
    cidMap = {}
    for i in range(len(cidList)):
        cidMap[cidList[i]] = 0
    
    bowDict = {}
    w2vDict = {}
    tmDict = {}
    
    cuserComQuser = {}  #cid, 0 or 1, compared with quserid
    ansProDict = {}     #cid, category_cgold probability
    tfidfDict = {}      #cid, tfidfScore
    urlDict = {}
    
    resultDict = {}
    
    utility = Utility()
    w2v = W2V(w2vFile, w2vDimension)
    tm = TopicModel(topicModelFile, topicModelDimension)
    
    files = [f for f in listdir(originalFile) if isdir(join(originalFile, f))]
    for directory in files:
        path = originalFile + directory
        fileList = [f for f in listdir(path) if isfile(join(path, f))]
        #question file
        with open(path + "/" + directory, "r") as fin:
            s1 = fin.read()
            vec1 = w2v.sentenceVector(s1)
            t1 = tm.getProbability(directory)
            
        #comment file
        for each in fileList:
            if each == directory:
                continue
            if each not in cidMap:
                break
                
            qid = directory
            cid = each
            cuserid = infoInstance.cidToCuserid(cid)           
            quserid = infoInstance.cidToQuserid(cid)
            qcategory = infoInstance.qidToCategory(qid) 
            
            
            if cuserid == quserid:
                cuserComQuser[cid] = 1.0
            else:
                cuserComQuser[cid] = 0.0           
      
            ansProDict[cid] = ansProInstance.getCategoryPro(qcategory)
            tfidfDict[cid] = tfidfInstance.getTfidfScore(cid)
            urlDict[cid] = hasUrlInstance.isExistUrl(cid) 
              
            completePath = path + "/" + each          
            with open(completePath, "r") as fin:
                s2 = fin.read()
                #some questions & comments are empty after preProcessing
                if not s1 or not s2:
                    bowDict[each] = 0.000000000001
                    w2vDict[each] = 0.000000000001
                    tmDict[each] = 0.000000000001
                    continue

                bow = BOW(s1, s2)   
                v1, v2 = bow.getVector()
                score = utility.cosine(v1, v2)
                bowDict[each] = score
                               
                vec2 = w2v.sentenceVector(s2)
                score = utility.cosine(vec1, vec2)               
                w2vDict[each] = score
                
                t2 = tm.getProbability(each)
                score = utility.cosine(t1, t2)
                tmDict[each] = score
                '''
                print bowDict
                print w2vDict
                print tmDict
                '''
    print "bowDict, w2vDict, tmDict done!"
    
    for key in bowDict:
        aList = []
        aList.append(bowDict[key])
        aList.append(w2vDict[key])
        aList.append(tmDict[key]) 
        aList.append(cuserComQuser[key])
        for i in range(len(ansProDict[key])):    
            aList.append(ansProDict[key][i])
        aList.append(tfidfDict[key])
        aList.append(urlDict[key])
        resultDict[key] = aList
    print "resultDict done!"
    return resultDict   
开发者ID:yaolili,项目名称:QAClassification,代码行数:99,代码来源:getYNCideFeatureVector.py


注:本文中的utility.Utility.cosine方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。