本文整理汇总了Python中SearchModule.sanitize_html方法的典型用法代码示例。如果您正苦于以下问题:Python SearchModule.sanitize_html方法的具体用法?Python SearchModule.sanitize_html怎么用?Python SearchModule.sanitize_html使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类SearchModule
的用法示例。
在下文中一共展示了SearchModule.sanitize_html方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: summary_results
# 需要导入模块: import SearchModule [as 别名]
# 或者: from SearchModule import sanitize_html [as 别名]
def summary_results(rawResults, strsearch, logic_items=[], results_stats={}):
results = []
titles = []
sptitle_collection = []
# ~ stats for each provider
for provid in xrange(len(rawResults)):
if (len(rawResults[provid])):
results_stats[str(rawResults[provid][0]['providertitle'])] = [len(rawResults[provid]), 0]
# ~ all in one array
for provid in xrange(len(rawResults)):
for z in xrange(len(rawResults[provid])):
if (rawResults[provid][z]['title'] != None):
rawResults[provid][z]['title'] = SearchModule.sanitize_html(rawResults[provid][z]['title'])
rawResults[provid][z]['provid'] = provid
title = SearchModule.sanitize_strings(rawResults[provid][z]['title'])
titles.append(title)
sptitle_collection.append(Set(title.split(".")))
results.append(rawResults[provid][z])
strsearch1 = SearchModule.sanitize_strings(strsearch)
strsearch1_collection = Set(strsearch1.split("."))
rcount = [0] * 3
for z in xrange(len(results)):
findone = 0
results[z]['ignore'] = 0
intrs = strsearch1_collection.intersection(sptitle_collection[z])
if (len(intrs) == len(strsearch1_collection)):
findone = 1
else:
results[z]['ignore'] = 2
# ~ relax the search ~ 0.45
unmatched_terms_search = strsearch1_collection.difference(intrs)
unmatched_count = 0
for mst in unmatched_terms_search:
my_list = [i for i in sptitle_collection[z] if i.find(mst) == 0]
if (len(my_list)):
unmatched_count = unmatched_count + 1
if (unmatched_count == len(unmatched_terms_search)):
findone = 1
results[z]['ignore'] = 0
# ~ print unmatched_terms_search
# ~ print unmatched_count
# ~ print unmatched_terms_search
# ~ print strsearch1_collection
# ~ print intrs
# ~ print findone
# ~ print '------------------'
if (findone and results[z]['ignore'] == 0):
# ~ print titles[z]
for v in xrange(z + 1, len(results)):
if (titles[z] == titles[v]):
sz1 = float(results[z]['size'])
sz2 = float(results[v]['size'])
if (abs(sz1 - sz2) < 5000000):
results[z]['ignore'] = 1
# ~ stats
rcount[results[z]['ignore']] += 1
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
# ~ logic params
exclude_coll = Set([])
include_coll = Set([])
# ~ print '*'+logic_items[0][1]+'*'
for i in xrange(len(logic_items)):
if (logic_items[i][0] == '-'):
exclude_coll.add(logic_items[i][1])
if (logic_items[i][0] == '+'):
include_coll.add(logic_items[i][1])
if (len(include_coll)):
for z in xrange(len(results)):
if (results[z]['ignore'] < 2):
intrs_i = include_coll.intersection(sptitle_collection[z])
if (len(intrs_i) == 0):
results[z]['ignore'] = 2
if (len(exclude_coll)):
for z in xrange(len(results)):
if (results[z]['ignore'] < 2):
intrs_e = exclude_coll.intersection(sptitle_collection[z])
if (len(intrs_e) > 0):
results[z]['ignore'] = 2
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
mssg = 'Overall search stats: [' + strsearch1 + ']' + ' [' + strsearch + '] ' + str(rcount[0]) + ' ' + str(
rcount[1]) + ' ' + str(rcount[2])
log.info(mssg)
for z in xrange(len(results)):
if (results[z]['ignore'] != 2):
results_stats[str(results[z]['providertitle'])][1] = results_stats[str(results[z]['providertitle'])][1] + 1
return results
示例2: summary_results
# 需要导入模块: import SearchModule [as 别名]
# 或者: from SearchModule import sanitize_html [as 别名]
def summary_results(rawResults, strsearch, logic_items=[]):
results = []
titles = []
sptitle_collection = []
# ~ all in one array
for provid in xrange(len(rawResults)):
for z in xrange(len(rawResults[provid])):
rawResults[provid][z]["title"] = SearchModule.sanitize_html(rawResults[provid][z]["title"])
title = SearchModule.sanitize_strings(rawResults[provid][z]["title"])
titles.append(title)
sptitle_collection.append(Set(title.split(".")))
results.append(rawResults[provid][z])
strsearch1 = SearchModule.sanitize_strings(strsearch)
strsearch1_collection = Set(strsearch1.split("."))
rcount = [0] * 3
for z in xrange(len(results)):
findone = 0
results[z]["ignore"] = 0
intrs = strsearch1_collection.intersection(sptitle_collection[z])
if len(intrs) == len(strsearch1_collection):
findone = 1
else:
results[z]["ignore"] = 2
# ~ print strsearch1_collection
# ~ print intrs
# ~ print findone
# ~ print '------------------'
if findone and results[z]["ignore"] == 0:
# ~ print titles[z]
for v in xrange(z + 1, len(results)):
if titles[z] == titles[v]:
sz1 = float(results[z]["size"])
sz2 = float(results[v]["size"])
if abs(sz1 - sz2) < 5000000:
results[z]["ignore"] = 1
# ~ stats
rcount[results[z]["ignore"]] += 1
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
# ~ logic params
exclude_coll = Set([])
include_coll = Set([])
# ~ print '*'+logic_items[0][1]+'*'
for i in xrange(len(logic_items)):
if logic_items[i][0] == "-":
exclude_coll.add(logic_items[i][1])
if logic_items[i][0] == "+":
include_coll.add(logic_items[i][1])
if len(include_coll):
for z in xrange(len(results)):
if results[z]["ignore"] < 2:
intrs_i = include_coll.intersection(sptitle_collection[z])
if len(intrs_i) == 0:
results[z]["ignore"] = 2
if len(exclude_coll):
for z in xrange(len(results)):
if results[z]["ignore"] < 2:
intrs_e = exclude_coll.intersection(sptitle_collection[z])
if len(intrs_e) > 0:
results[z]["ignore"] = 2
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
mssg = (
"[" + strsearch1 + "]" + " [" + strsearch + "] " + str(rcount[0]) + " " + str(rcount[1]) + " " + str(rcount[2])
)
print mssg
log.info(mssg)
return results