当前位置: 首页>>代码示例>>Python>>正文


Python NGram.compare方法代码示例

本文整理汇总了Python中ngram.NGram.compare方法的典型用法代码示例。如果您正苦于以下问题:Python NGram.compare方法的具体用法?Python NGram.compare怎么用?Python NGram.compare使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在ngram.NGram的用法示例。


在下文中一共展示了NGram.compare方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: ngram_similarity

# 需要导入模块: from ngram import NGram [as 别名]
# 或者: from ngram.NGram import compare [as 别名]
def ngram_similarity(univ_name):
    out = {}
    with open("static/UniqueFBUnivNames.csv", 'rb') as f:
        reader = csv.reader(f)
        for row in reader:
            row = re.sub('[^A-Za-z0-9 ]+', ' ', str(row))
            row = re.sub('  ', ' ', str(row))
            out['score'] = NGram.compare(str(row).lower(), univ_name, N=1)
            if NGram.compare(str(row).lower(), str(univ_name).lower()) > 0.5:
                out['score_used'] = NGram.compare(str(row).lower(), univ_name)
                out['univ'] = str(row)
                return out
    return out
开发者ID:MysteriousMagics,项目名称:NLPCareerTrajectory,代码行数:15,代码来源:univ_lookup.py

示例2: verify

# 需要导入模块: from ngram import NGram [as 别名]
# 或者: from ngram.NGram import compare [as 别名]
	def verify(self,text_compare):
		results = []
		dictio = []
		file2 = open(text_compare,"r")
		linea2 = file2.readline()
		while linea2 != '':	
			if linea2 != '\n':
				dictio += [self.ng.items_sharing_ngrams(linea2)]
				compares = 0.0
				for parrafo in self.lsn:
					comp = NGram.compare(parrafo,linea2)
					if compares < comp:
						compares = comp
				results += [compares]
			linea2 = file2.readline()
		file2.close()

		major_ocurrences=[]
		for d in dictio:
			major=0
			for val in d.values():
				if major<val:
					major=val
			major_ocurrences+=[major]
			

		avg_perc=0.0
		for r in results:
			avg_perc+=r
		avg_perc=avg_perc/len(results)

		print("Mayor numero de ocurrencias por parrafo del texto copia: "+repr(major_ocurrences))
		print("Porcentaje Similitud: "+repr(avg_perc))
开发者ID:elard28,项目名称:plagiarism-ngram,代码行数:35,代码来源:init.py

示例3: simtitle

# 需要导入模块: from ngram import NGram [as 别名]
# 或者: from ngram.NGram import compare [as 别名]
def simtitle( request ):
    """calculate similarity based on title and naive threshold"""
    n = NGram( warp=WARP, iconv=enrich, key=lambda x: x.title )
    articles = Article.objects.filter( status = "live" ).order_by( "date_published" )[:1000]
    results = []
    for article in articles:
        article.is_duplicate = False
        article.duplicate_of = None
        article.save()
        sim = filter( lambda a: a[1] >= 0.4, n.search( article.title ) )
        for match in sim:
            nearest = match[0]
            if nearest.is_duplicate:
                nearest = nearest.duplicate_of
                if NGram.compare( article.title, nearest.title ) < 0.7:
                    results.append( article )
                    break
            article.is_duplicate = True
            article.duplicate_of = nearest
            article.save()
            break
        else:
            results.append( article )
        n.add( article )
    return render( request, "dump.html", dictionary = { "article_list": results, } )
开发者ID:mrmonkington,项目名称:channelfunnel,代码行数:27,代码来源:views.py

示例4: compare_ngrams

# 需要导入模块: from ngram import NGram [as 别名]
# 或者: from ngram.NGram import compare [as 别名]
def compare_ngrams(left, right, N=2, pad_len=0):
    left = ascii(left)
    right = ascii(right)
    if len(left) == 1 and len(right) == 1:
        # NGram.compare returns 0.0 for 1 letter comparison, even if letters
        # are equal.
        return 1.0 if left == right else 0.0
    return NGram.compare(left, right, N=N, pad_len=pad_len)
开发者ID:digideskio,项目名称:addok,代码行数:10,代码来源:text.py

示例5: test_ngram_search

# 需要导入模块: from ngram import NGram [as 别名]
# 或者: from ngram.NGram import compare [as 别名]
    def test_ngram_search(self):
        """Tests from the original ngram.py, to check that the
        rewrite still uses the same underlying algorithm"""

        # Basic searching of the index
        idx = NGram(self.items)
        self.assertEqual(idx.search('askfjwehiuasdfji'), [
            ('askfjwehiuasdfji', 1.0),
            ('asdfawe', 0.17391304347826086),
            ('asfwef', 0.083333333333333329),
            ('adfwe', 0.041666666666666664)])
        self.assertEqual(idx.search('afadfwe')[:2],
                [('adfwe', 0.59999999999999998),
                 ('asdfawe', 0.20000000000000001)])

        # Pairwise comparison of strings
        self.assertEqual(NGram.compare('sdfeff', 'sdfeff'), 1.0)
        self.assertEqual(NGram.compare('sdfeff', 'zzzzzz'), 0.0)
开发者ID:DavidBrear,项目名称:python-ngram,代码行数:20,代码来源:test_ngram.py

示例6: cumulative_score_strings

# 需要导入模块: from ngram import NGram [as 别名]
# 或者: from ngram.NGram import compare [as 别名]
def cumulative_score_strings(iline, jline, N):
	iString = " ".join(iline.split(" :::: ")[:3])
	jString = " ".join(jline.split(" :::: ")[:3])

	score = 0
	while N >= 1:
		score += (NGram.compare(iString, jString, N=N)) #* N)
		N = N - 1

	return score
开发者ID:mayhewsw,项目名称:HMMClustering,代码行数:12,代码来源:clusteralgorithm.py

示例7: backoff_score_strings

# 需要导入模块: from ngram import NGram [as 别名]
# 或者: from ngram.NGram import compare [as 别名]
def backoff_score_strings(iline, jline, N, T=0.0):
	iString = " ".join(iline.split(" :::: ")[:3])
	jString = " ".join(jline.split(" :::: ")[:3])

	score = -1
	while score <= T and N >= 1:
		score = NGram.compare(iString, jString, N=N)
		N = N - 1

	return score
开发者ID:mayhewsw,项目名称:HMMClustering,代码行数:12,代码来源:clusteralgorithm.py

示例8: guess_image

# 需要导入模块: from ngram import NGram [as 别名]
# 或者: from ngram.NGram import compare [as 别名]
def guess_image(name):
    '''
    Guess which meme image they mean by finding the alias with greatest ngram
    similarity
    '''
    name = tokenize(name)
    best = '404'
    best_score = None
    for guess_image, names in IMAGES.iteritems():
        for guess in names:
            score = NGram.compare(guess, name)
            if best_score is None or score > best_score:
                best_score = score
                best = guess_image
    app.logger.info('Pick image %s for name "%s"' % (best, name))
    return best
开发者ID:jason-feng,项目名称:hss-urlmeme,代码行数:18,代码来源:url.py

示例9: guess_meme_image

# 需要导入模块: from ngram import NGram [as 别名]
# 或者: from ngram.NGram import compare [as 别名]
def guess_meme_image(meme_name):
    '''
    Guess which meme image they mean by finding the alias with greatest ngram
    similarity
    '''
    meme_name = tokenize(meme_name)
    best = ''
    best_score = None
    for guess_image, names in MEMES.items():
        for guess in names:
            guess = tokenize(guess)
            score = NGram.compare(guess, meme_name)
            if best_score is None or score > best_score:
                best_score = score
                best = guess_image
                app.logger.info('New best meme for "%s": "%s" (Score: %s)', meme_name, guess, score)
    app.logger.info('Picked meme "%s" for name "%s"' % (best, meme_name))
    return best
开发者ID:jasonbot,项目名称:urlmeme,代码行数:20,代码来源:url.py

示例10: smart_read

# 需要导入模块: from ngram import NGram [as 别名]
# 或者: from ngram.NGram import compare [as 别名]
def smart_read(url):
    resp = urllib2.urlopen(url)
    #resolve url
    url = resp.url
    domain = urlparse(url).netloc
    path = urlparse(url).path
    
    html = resp.read()
    tree = etree.parse(StringIO.StringIO(html), parser)
    links = tree.xpath("//body//@href")
    nmax = 0
    for link in links:
        if urlparse(link).netloc == domain:
            ng = NGram.compare(urlparse(link).path,path)
            #print link,ng
            if ng > nmax and ng < 1:
                nmax = ng
                mirror = link
    diffh = htmldiff(visit_page(url)["body"], visit_page(mirror)["body"])
    tree = etree.parse(StringIO.StringIO(diffh), parser)
    diff = tree.xpath("//ins//text()")
    for d in diff:
        print d
开发者ID:justzx2011,项目名称:Robottke,代码行数:25,代码来源:smartread.py

示例11: process

# 需要导入模块: from ngram import NGram [as 别名]
# 或者: from ngram.NGram import compare [as 别名]
def process(hr,sr,he,se):

    categories_relevant = {}
    categories_extracted = {}

    category_idx_list = []
    for i,h in enumerate(hr):
        for j,h1 in enumerate(he):
            if NGram.compare(hr[i], he[j]) > 0.95:
                category_idx_list.append((i,j))

    if he:
        if len(he) != len(se):
            return 0 , 0
    for i,C in enumerate(category_idx_list):
        categories_relevant[i] = sr[C[0]]
        tmp = se[C[1]].replace('\r', '').replace('\n','')
        categories_extracted[i] = tmp

    e = Evaluator(categories_relevant, categories_extracted)
    p, r = e.evaluate_using_ngrams(3)

    return p, r
开发者ID:adityamarella,项目名称:frontierproject,代码行数:25,代码来源:evaluate.py

示例12: sim

# 需要导入模块: from ngram import NGram [as 别名]
# 或者: from ngram.NGram import compare [as 别名]
 def sim( a, b ):
     return 1 - NGram.compare( a.title, b.title, warp=WARP, iconv=enrich )
开发者ID:mrmonkington,项目名称:channelfunnel,代码行数:4,代码来源:views.py

示例13: SPARQLWrapper

# 需要导入模块: from ngram import NGram [as 别名]
# 或者: from ngram.NGram import compare [as 别名]
query = dup_label

# Set up
pp = pprint.PrettyPrinter(indent=2)
sparql = SPARQLWrapper("http://husky-big.cs.uwaterloo.ca:8890/sparql")

# Send query
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
entries = results["results"]["bindings"]
"""
for result in entries:
    pp.pprint((result['s']['value'], result['o']["value"]))
    """
# Approximately 20k entries
print(len(entries), " entries retrieved")

for (indexA, entryA) in enumerate(entries):
    for (indexB, entryB) in enumerate(entries):
        if (indexA <= indexB):
            distance = NGram.compare(entryA['o']['value'], entryB['o']['value'])
            if (distance > 0.8 and entryA['s']['value'] != entryB['s']['value']):
                pp.pprint((distance, entryA['s']['value'], entryA['o']['value'], 
                    entryB['s']['value'],entryB['o']['value']))





开发者ID:Disiok,项目名称:uroc-dedup,代码行数:27,代码来源:query.py

示例14: nearlySameText

# 需要导入模块: from ngram import NGram [as 别名]
# 或者: from ngram.NGram import compare [as 别名]
def nearlySameText(text_1, text_2):
    return NGram.compare(text_1.strip(), text_2.strip()) >= 0.9
开发者ID:xiachaolun,项目名称:scienceograhy,代码行数:4,代码来源:tool.py

示例15: ng_pare

# 需要导入模块: from ngram import NGram [as 别名]
# 或者: from ngram.NGram import compare [as 别名]
def ng_pare(needle='default', fn='/usr/share/dict/words', pth=.50):
	with open(fn, 'r') as fh:
        	ng_haystack = {line.lower()
                        	for line in fh
                        	if NGram.compare(needle, line.lower(), N=1) - pth >= 0.0}
	return ng_haystack
开发者ID:psaintlaurent,项目名称:StringMatchingTest,代码行数:8,代码来源:StringMatchTest.py


注:本文中的ngram.NGram.compare方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。