本文整理汇总了Python中nltk.metrics.distance.edit_distance函数的典型用法代码示例。如果您正苦于以下问题:Python edit_distance函数的具体用法?Python edit_distance怎么用?Python edit_distance使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了edit_distance函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process_rel_candidate_for_drop_led
def process_rel_candidate_for_drop_led(relnode_candidate, filtered_mod_pos, nodeset, simple_sentences, main_sent_dict, boxer_graph, opr_drop_rel):
simple_sentence = " ".join(simple_sentences)
sentence_before_drop = boxer_graph.extract_main_sentence(nodeset, main_sent_dict, filtered_mod_pos)
edit_dist_before_drop = edit_distance(sentence_before_drop.split(), simple_sentence.split())
temp_nodeset, temp_filtered_mod_pos = boxer_graph.drop_relation(nodeset, relnode_candidate, filtered_mod_pos)
sentence_after_drop = boxer_graph.extract_main_sentence(temp_nodeset, main_sent_dict, temp_filtered_mod_pos)
edit_dist_after_drop = edit_distance(sentence_after_drop.split(), simple_sentence.split())
isDrop = compare_edit_distance(opr_drop_rel, edit_dist_after_drop, edit_dist_before_drop)
return isDrop
示例2: process_ood_candidate_for_drop_led
def process_ood_candidate_for_drop_led(oodnode_candidate, filtered_mod_pos, nodeset, simple_sentences, main_sent_dict, boxer_graph, opr_drop_ood):
simple_sentence = " ".join(simple_sentences)
sentence_before_drop = boxer_graph.extract_main_sentence(nodeset, main_sent_dict, filtered_mod_pos)
edit_dist_before_drop = edit_distance(sentence_before_drop.split(), simple_sentence.split())
temp_nodeset = nodeset[:]
temp_nodeset.remove(oodnode_candidate)
sentence_after_drop = boxer_graph.extract_main_sentence(temp_nodeset, main_sent_dict, filtered_mod_pos)
edit_dist_after_drop = edit_distance(sentence_after_drop.split(), simple_sentence.split())
isDrop = compare_edit_distance(opr_drop_ood, edit_dist_after_drop, edit_dist_before_drop)
return isDrop
示例3: make_compatible
def make_compatible(input_str):
for i in range(len(rer_out['taglist'])):
if(rer_out['taglist'][i] == "Org"):
for j in allprods:
if(dist.edit_distance(rer_out['wordlist'][i], j) < 2):
rer_out['wordlist'][i] = j
break
if(rer_out['taglist'][i] == "Family"):
for j in allprods:
for k in allprods[j]:
if(dist.edit_distance(rer_out['wordlist'][i], k) < 4):
rer_out['wordlist'][i] = k
break
示例4: process_mod_candidate_for_drop_led
def process_mod_candidate_for_drop_led(modcand_to_process, filtered_mod_pos, nodeset, simple_sentences, main_sent_dict, boxer_graph, opr_drop_mod):
simple_sentence = " ".join(simple_sentences)
sentence_before_drop = boxer_graph.extract_main_sentence(nodeset, main_sent_dict, filtered_mod_pos)
edit_dist_before_drop = edit_distance(sentence_before_drop.split(), simple_sentence.split())
modcand_position_to_process = modcand_to_process[0]
temp_filtered_mod_pos = filtered_mod_pos[:]+[modcand_position_to_process]
sentence_after_drop = boxer_graph.extract_main_sentence(nodeset, main_sent_dict, temp_filtered_mod_pos)
edit_dist_after_drop = edit_distance(sentence_after_drop.split(), simple_sentence.split())
isDrop = compare_edit_distance(opr_drop_mod, edit_dist_after_drop, edit_dist_before_drop)
return isDrop
示例5: one2ArrEditDistance
def one2ArrEditDistance(self,sen,arr):
score = []
for l in arr:
score.append(edit_distance(sen,l))
if len(score) != 0:
return sum(score)*1.0/len(score)
return 0
示例6: mean_char_edit_distance
def mean_char_edit_distance(candidates, references):
total_distance = 0
total_target_length = 0
for y, t in zip(candidates, references):
total_distance += edit_distance(y, t)
total_target_length += len(t)
return total_distance/total_target_length
示例7: levenshtein_sort
def levenshtein_sort(self, keyword, domains):
"""
Sort domains by Levenshtein edit-distance
:param sentence: str input source
:param domains: domains list
:rtype: list
:return: sorted names list
"""
# distance counter
# transpositions - ab == ba
distance = lambda s, d: edit_distance(s, d, transpositions=True)
# remove zone
get_str = lambda domain: re.sub('([.][a-z]{2,4})+$', '', domain)
domains = map(get_str, domains)
# Sorter
for i in range(len(domains)):
for j in range(len(domains) - 1):
if (distance(keyword, get_str(domains[j])) >
distance(keyword, get_str(domains[j + 1]))):
tmp = copy(domains[j + 1])
domains[j + 1] = domains[j]
domains[j] = tmp
return domains
示例8: select_anagrams
def select_anagrams(token, structures):
"""Select possible anagrams for a given token
Parameters:
token (:func:`str`): Cleaned token
structures (:func:`dict`): Datastructures from file
Returns:
:func:`dict` - Possible anagrams (keys) along with their score (values)
"""
anagrams = {}
focus_alphabet = generate_alphabet_from_word(token[1])
token_hash = anagram_hash(token)
hash_list = []
for c in structures["alphabet"]:
for f in focus_alphabet:
hash_list.append(token_hash + c - f)
hash_counter = Counter(hash_list) # Counting retrieval occurence
for h in set(hash_counter.keys()).intersection(set(structures["anagrams"].keys())):
count = hash_counter[h]
anag_list = [anag for anag in structures["anagrams"][h] if edit_distance(anag, token) <= 3]
for anag in anag_list:
anag_score = rate_anagram(structures["occurence_map"], token, anag, count)
if anag_score > 0:
anagrams[anag] = anag_score
return anagrams
示例9: __init__
def __init__(self):
self.stemmer = LancasterStemmer()
self.stem_mapping = {}
self.stemmed_trie = TrieNode()
self.trie = TrieNode()
self.singles_lst = []
self.black_listed_stems = set([])
loaded = cPickle.load(open(DICTIONARY, 'r'))
print len(loaded)
loaded += CUSTOM
loaded = set(loaded)
most_common = cPickle.load(open(MOST_COMMON, 'r'))
for word in most_common:
self.black_listed_stems.add(self.stem(word))
#print self.black_listed_stems
for word in loaded:
word = word.lower()
if word not in most_common[:TOP_K_FILTER]:
self.trie.insert(word)
stemmed_word = self.stem(word)
if stemmed_word in self.stem_mapping:
previous = self.stem_mapping[stemmed_word]
edist = distance.edit_distance(word, previous)
if edist > 2:
pass
#print 'warning: %s dropped in favor of %s' % (word, previous)
else:
if stemmed_word not in self.black_listed_stems:
self.stem_mapping[stemmed_word] = word
self.stemmed_trie.insert(stemmed_word)
示例10: get_candidates
def get_candidates(self, word, D=1):
"""If word is in lexicon returns [(word, 1.0)].
Otherwise returns a list with all the words in lexicon that has
a distance equal or less than to D (D is the Levenshtein edit-distance)
If there is no such word, returns [(word, 0.0)]
"""
word = word.lower()
if word in self.fdist:
return [(word, 1.0)]
candidates = []
counts = []
for w, c in self.fdist.iteritems():
if edit_distance(w, word) <= D:
candidates.append(w)
counts.append(c)
if len(candidates) == 0:
candidates.append(word)
counts.append(0)
probs = [float(c) / self.wcount for c in counts]
return sorted(zip(candidates, probs), key=lambda x: x[1], reverse=True)
示例11: replace
def replace(self, word):
suggestions = self.spell_dict.suggest(word)
if suggestions:
for suggestion in suggestions:
if edit_distance(word, suggestion) <= self.max_dist:
return suggestions[0]
return word
示例12: get_geonames_code
def get_geonames_code(m):
lat = session.scalar(m._geo_ponto.y)
lon = session.scalar(m._geo_ponto.x)
places = geonames_reverse(lat, lon)
for place in places:
nome1 = m.nome.strip().lower()
nome2 = place[u'name'].strip().lower()
if edit_distance(nome1, nome2) < 2:
return int(place[u'geonameId'])
示例13: eval
def eval(references):
string_distances = {'siddharthan':[], 'bayes_no_variation':[], 'bayes_variation':[]}
jaccard_distances = {'siddharthan':[], 'bayes_no_variation':[], 'bayes_variation':[]}
for reference in references:
print reference
string_distances['siddharthan'].append(edit_distance(reference['original'], reference['siddharthan']))
string_distances['bayes_no_variation'].append(edit_distance(reference['original'], reference['bayes_no_variation']))
string_distances['bayes_variation'].append(edit_distance(reference['original'], reference['bayes_variation']))
# jaccard_distances['siddharthan'].append(jaccard_distance(reference['original'], reference['siddharthan']))
# jaccard_distances['bayes_no_variation'].append(jaccard_distance(reference['original'], reference['bayes_no_variation']))
# jaccard_distances['bayes_variation'].append(jaccard_distance(reference['original'], reference['bayes_variation']))
print 'String distances: '
print 'siddharthan: ', mean_confidence_interval(string_distances['siddharthan'])
print 'bayes_no_variation: ', mean_confidence_interval(string_distances['bayes_no_variation'])
print 'bayes_variation: ', mean_confidence_interval(string_distances['bayes_variation'])
print 10 * '-'
示例14: close_enough_buckets
def close_enough_buckets(first_bucket, second_bucket, dist):
if first_bucket == second_bucket:
return False
elif edit_distance(first_bucket, second_bucket) <= dist:
return True
else:
return False
示例15: strip_synonyms
def strip_synonyms(output_set, exclude_set):
# Remove synonyms that have Levenshtein distance of 1, AFTER removing plurals.
for word in output_set:
for synset in wn.synsets(word):
for synonym in synset.lemma_names():
if edit_distance(word,synonym) == 1:
exclude_set.add(synonym)
output_set.difference_update(exclude_set)
return output_set, exclude_set