本文整理汇总了Python中utility.Utility.cosine方法的典型用法代码示例。如果您正苦于以下问题:Python Utility.cosine方法的具体用法?Python Utility.cosine怎么用?Python Utility.cosine使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类utility.Utility
的用法示例。
在下文中一共展示了Utility.cosine方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from utility import Utility [as 别名]
# 或者: from utility.Utility import cosine [as 别名]
def main(originalFile, w2vFile, w2vDimension, topicModelFile, topicModelDimension, infoInstance, tfidfInstance, hasUrlInstance, ansProInstance):
bowDict = {}
w2vDict = {}
tmDict = {}
cuserComQuser = {} #cid, 0 or 1, compared with quserid
ansProDict = {} #cid, category_cgold probability
tfidfDict = {} #cid, tfidfScore
urlDict = {}
resultDict = {}
utility = Utility()
w2v = W2V(w2vFile, w2vDimension)
tm = TopicModel(topicModelFile, topicModelDimension)
files = [f for f in listdir(originalFile) if isdir(join(originalFile, f))]
for directory in files:
path = originalFile + directory
fileList = [f for f in listdir(path) if isfile(join(path, f))]
#question file
with open(path + "/" + directory, "r") as fin:
s1 = fin.read()
vec1 = w2v.sentenceVector(s1)
t1 = tm.getProbability(directory)
#comment file
for each in fileList:
if each == directory:
continue
qid = directory
cid = each
cuserid = infoInstance.cidToCuserid(cid)
quserid = infoInstance.cidToQuserid(cid)
qcategory = infoInstance.qidToCategory(qid)
if cuserid == quserid:
cuserComQuser[cid] = 1.0
else:
cuserComQuser[cid] = 0.0
'''
#notice, record the categoryAnsPro of train set first using following commands
#after that you can use the command of "ansProDict[cid] = ansProInstance.getCategoryPro(qcategory)" in train, dev and test set
ansProDict[cid] = infoInstance.getCategoryAnsPro(qcategory)
cg = open("categoryAnsProTrain.txt", "a+")
cg.write(qcategory + "\t")
for i in range(len(ansProDict[cid])):
cg.write(str(ansProDict[cid][i]) + "\t")
cg.write("\n")
'''
ansProDict[cid] = ansProInstance.getCategoryPro(qcategory)
tfidfDict[cid] = tfidfInstance.getTfidfScore(cid)
urlDict[cid] = hasUrlInstance.isExistUrl(cid)
completePath = path + "/" + each
with open(completePath, "r") as fin:
s2 = fin.read()
#some questions & comments are empty after preProcessing
if not s1 or not s2:
bowDict[each] = 0.000000000001
w2vDict[each] = 0.000000000001
tmDict[each] = 0.000000000001
continue
bow = BOW(s1, s2)
v1, v2 = bow.getVector()
score = utility.cosine(v1, v2)
bowDict[each] = score
vec2 = w2v.sentenceVector(s2)
score = utility.cosine(vec1, vec2)
w2vDict[each] = score
t2 = tm.getProbability(each)
score = utility.cosine(t1, t2)
tmDict[each] = score
print "bowDict, w2vDict, tmDict done!"
for key in bowDict:
aList = []
aList.append(bowDict[key])
aList.append(w2vDict[key])
aList.append(tmDict[key])
aList.append(cuserComQuser[key])
for i in range(len(ansProDict[key])):
aList.append(ansProDict[key][i])
aList.append(tfidfDict[key])
aList.append(urlDict[key])
resultDict[key] = aList
print "resultDict done!"
return resultDict
示例2: main
# 需要导入模块: from utility import Utility [as 别名]
# 或者: from utility.Utility import cosine [as 别名]
def main(originalFile, w2vFile, w2vDimension, topicModelFile, topicModelDimension, infoInstance, tfidfInstance, hasUrlInstance, ansProInstance, ynInstance):
cidList = ynInstance.getCidList()
cidMap = {}
for i in range(len(cidList)):
cidMap[cidList[i]] = 0
bowDict = {}
w2vDict = {}
tmDict = {}
cuserComQuser = {} #cid, 0 or 1, compared with quserid
ansProDict = {} #cid, category_cgold probability
tfidfDict = {} #cid, tfidfScore
urlDict = {}
resultDict = {}
utility = Utility()
w2v = W2V(w2vFile, w2vDimension)
tm = TopicModel(topicModelFile, topicModelDimension)
files = [f for f in listdir(originalFile) if isdir(join(originalFile, f))]
for directory in files:
path = originalFile + directory
fileList = [f for f in listdir(path) if isfile(join(path, f))]
#question file
with open(path + "/" + directory, "r") as fin:
s1 = fin.read()
vec1 = w2v.sentenceVector(s1)
t1 = tm.getProbability(directory)
#comment file
for each in fileList:
if each == directory:
continue
if each not in cidMap:
break
qid = directory
cid = each
cuserid = infoInstance.cidToCuserid(cid)
quserid = infoInstance.cidToQuserid(cid)
qcategory = infoInstance.qidToCategory(qid)
if cuserid == quserid:
cuserComQuser[cid] = 1.0
else:
cuserComQuser[cid] = 0.0
ansProDict[cid] = ansProInstance.getCategoryPro(qcategory)
tfidfDict[cid] = tfidfInstance.getTfidfScore(cid)
urlDict[cid] = hasUrlInstance.isExistUrl(cid)
completePath = path + "/" + each
with open(completePath, "r") as fin:
s2 = fin.read()
#some questions & comments are empty after preProcessing
if not s1 or not s2:
bowDict[each] = 0.000000000001
w2vDict[each] = 0.000000000001
tmDict[each] = 0.000000000001
continue
bow = BOW(s1, s2)
v1, v2 = bow.getVector()
score = utility.cosine(v1, v2)
bowDict[each] = score
vec2 = w2v.sentenceVector(s2)
score = utility.cosine(vec1, vec2)
w2vDict[each] = score
t2 = tm.getProbability(each)
score = utility.cosine(t1, t2)
tmDict[each] = score
'''
print bowDict
print w2vDict
print tmDict
'''
print "bowDict, w2vDict, tmDict done!"
for key in bowDict:
aList = []
aList.append(bowDict[key])
aList.append(w2vDict[key])
aList.append(tmDict[key])
aList.append(cuserComQuser[key])
for i in range(len(ansProDict[key])):
aList.append(ansProDict[key][i])
aList.append(tfidfDict[key])
aList.append(urlDict[key])
resultDict[key] = aList
print "resultDict done!"
return resultDict