当前位置: 首页>>代码示例>>Python>>正文


Python SearchModule.sanitize_html方法代码示例

本文整理汇总了Python中SearchModule.sanitize_html方法的典型用法代码示例。如果您正苦于以下问题:Python SearchModule.sanitize_html方法的具体用法?Python SearchModule.sanitize_html怎么用?Python SearchModule.sanitize_html使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在SearchModule的用法示例。


在下文中一共展示了SearchModule.sanitize_html方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: summary_results

# 需要导入模块: import SearchModule [as 别名]
# 或者: from SearchModule import sanitize_html [as 别名]
def summary_results(rawResults, strsearch, logic_items=[], results_stats={}):
    results = []
    titles = []
    sptitle_collection = []

    # ~ stats for each provider
    for provid in xrange(len(rawResults)):
        if (len(rawResults[provid])):
            results_stats[str(rawResults[provid][0]['providertitle'])] = [len(rawResults[provid]), 0]

    # ~ all in one array
    for provid in xrange(len(rawResults)):
        for z in xrange(len(rawResults[provid])):
            if (rawResults[provid][z]['title'] != None):
                rawResults[provid][z]['title'] = SearchModule.sanitize_html(rawResults[provid][z]['title'])
                rawResults[provid][z]['provid'] = provid
                title = SearchModule.sanitize_strings(rawResults[provid][z]['title'])
                titles.append(title)
                sptitle_collection.append(Set(title.split(".")))
                results.append(rawResults[provid][z])

    strsearch1 = SearchModule.sanitize_strings(strsearch)
    strsearch1_collection = Set(strsearch1.split("."))

    rcount = [0] * 3
    for z in xrange(len(results)):
        findone = 0
        results[z]['ignore'] = 0
        intrs = strsearch1_collection.intersection(sptitle_collection[z])
        if (len(intrs) == len(strsearch1_collection)):
            findone = 1
        else:
            results[z]['ignore'] = 2
            # ~ relax the search ~ 0.45
            unmatched_terms_search = strsearch1_collection.difference(intrs)
            unmatched_count = 0
            for mst in unmatched_terms_search:
                my_list = [i for i in sptitle_collection[z] if i.find(mst) == 0]
                if (len(my_list)):
                    unmatched_count = unmatched_count + 1
                if (unmatched_count == len(unmatched_terms_search)):
                    findone = 1
                    results[z]['ignore'] = 0
                # ~ print unmatched_terms_search
                # ~ print unmatched_count
                # ~ print unmatched_terms_search


        # ~ print strsearch1_collection
        # ~ print intrs
        # ~ print findone
        # ~ print '------------------'

        if (findone and results[z]['ignore'] == 0):
            # ~ print titles[z]
            for v in xrange(z + 1, len(results)):
                if (titles[z] == titles[v]):
                    sz1 = float(results[z]['size'])
                    sz2 = float(results[v]['size'])
                    if (abs(sz1 - sz2) < 5000000):
                        results[z]['ignore'] = 1
        # ~ stats
        rcount[results[z]['ignore']] += 1

    # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
    # ~ logic params
    exclude_coll = Set([])
    include_coll = Set([])
    # ~ print '*'+logic_items[0][1]+'*'
    for i in xrange(len(logic_items)):
        if (logic_items[i][0] == '-'):
            exclude_coll.add(logic_items[i][1])
        if (logic_items[i][0] == '+'):
            include_coll.add(logic_items[i][1])
    if (len(include_coll)):
        for z in xrange(len(results)):
            if (results[z]['ignore'] < 2):
                intrs_i = include_coll.intersection(sptitle_collection[z])
                if (len(intrs_i) == 0):
                    results[z]['ignore'] = 2
    if (len(exclude_coll)):
        for z in xrange(len(results)):
            if (results[z]['ignore'] < 2):
                intrs_e = exclude_coll.intersection(sptitle_collection[z])
                if (len(intrs_e) > 0):
                    results[z]['ignore'] = 2
    # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~

    mssg = 'Overall search stats: [' + strsearch1 + ']' + ' [' + strsearch + '] ' + str(rcount[0]) + ' ' + str(
        rcount[1]) + ' ' + str(rcount[2])
    log.info(mssg)

    for z in xrange(len(results)):
        if (results[z]['ignore'] != 2):
            results_stats[str(results[z]['providertitle'])][1] = results_stats[str(results[z]['providertitle'])][1] + 1
    return results
开发者ID:etomm,项目名称:usntssearch,代码行数:98,代码来源:megasearch.py

示例2: summary_results

# 需要导入模块: import SearchModule [as 别名]
# 或者: from SearchModule import sanitize_html [as 别名]
def summary_results(rawResults, strsearch, logic_items=[]):

    results = []
    titles = []
    sptitle_collection = []

    # ~ all in one array
    for provid in xrange(len(rawResults)):
        for z in xrange(len(rawResults[provid])):
            rawResults[provid][z]["title"] = SearchModule.sanitize_html(rawResults[provid][z]["title"])
            title = SearchModule.sanitize_strings(rawResults[provid][z]["title"])
            titles.append(title)
            sptitle_collection.append(Set(title.split(".")))
            results.append(rawResults[provid][z])

    strsearch1 = SearchModule.sanitize_strings(strsearch)
    strsearch1_collection = Set(strsearch1.split("."))

    rcount = [0] * 3
    for z in xrange(len(results)):
        findone = 0
        results[z]["ignore"] = 0
        intrs = strsearch1_collection.intersection(sptitle_collection[z])
        if len(intrs) == len(strsearch1_collection):
            findone = 1
        else:
            results[z]["ignore"] = 2

            # ~ print strsearch1_collection
            # ~ print intrs
            # ~ print findone
            # ~ print '------------------'

        if findone and results[z]["ignore"] == 0:
            # ~ print titles[z]
            for v in xrange(z + 1, len(results)):
                if titles[z] == titles[v]:
                    sz1 = float(results[z]["size"])
                    sz2 = float(results[v]["size"])
                    if abs(sz1 - sz2) < 5000000:
                        results[z]["ignore"] = 1
                        # ~ stats
        rcount[results[z]["ignore"]] += 1

        # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
        # ~ logic params
    exclude_coll = Set([])
    include_coll = Set([])
    # ~ print '*'+logic_items[0][1]+'*'
    for i in xrange(len(logic_items)):
        if logic_items[i][0] == "-":
            exclude_coll.add(logic_items[i][1])
        if logic_items[i][0] == "+":
            include_coll.add(logic_items[i][1])
    if len(include_coll):
        for z in xrange(len(results)):
            if results[z]["ignore"] < 2:
                intrs_i = include_coll.intersection(sptitle_collection[z])
                if len(intrs_i) == 0:
                    results[z]["ignore"] = 2
    if len(exclude_coll):
        for z in xrange(len(results)):
            if results[z]["ignore"] < 2:
                intrs_e = exclude_coll.intersection(sptitle_collection[z])
                if len(intrs_e) > 0:
                    results[z]["ignore"] = 2
                # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~

    mssg = (
        "[" + strsearch1 + "]" + " [" + strsearch + "] " + str(rcount[0]) + " " + str(rcount[1]) + " " + str(rcount[2])
    )
    print mssg
    log.info(mssg)

    return results
开发者ID:rocksie67,项目名称:usntssearch,代码行数:77,代码来源:megasearch.py


注:本文中的SearchModule.sanitize_html方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。