本文整理汇总了Python中enchant.checker.SpellChecker.get_text方法的典型用法代码示例。如果您正苦于以下问题:Python SpellChecker.get_text方法的具体用法?Python SpellChecker.get_text怎么用?Python SpellChecker.get_text使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类enchant.checker.SpellChecker
的用法示例。
在下文中一共展示了SpellChecker.get_text方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: spellChecker
# 需要导入模块: from enchant.checker import SpellChecker [as 别名]
# 或者: from enchant.checker.SpellChecker import get_text [as 别名]
class spellChecker(object):
def __init__(self, text, dictionary):
super(spellChecker, self).__init__()
log.debug("Creating the SpellChecker object. Dictionary: %s" % (dictionary,))
self.active = True
try:
if config.app["app-settings"]["language"] == "system":
log.debug("Using the system language")
self.checker = SpellChecker(languageHandler.curLang, filters=[tokenize.EmailFilter, tokenize.URLFilter])
else:
log.debug("Using language: %s" % (languageHandler.getLanguage(),))
self.checker = SpellChecker(languageHandler.curLang, filters=[tokenize.EmailFilter, tokenize.URLFilter])
self.checker.set_text(text)
except DictNotFoundError:
print "no dict"
log.exception("Dictionary for language %s not found." % (dictionary,))
wx_ui.dict_not_found_error()
self.active = False
if self.active == True:
log.debug("Creating dialog...")
self.dialog = wx_ui.spellCheckerDialog()
widgetUtils.connect_event(self.dialog.ignore, widgetUtils.BUTTON_PRESSED, self.ignore)
widgetUtils.connect_event(self.dialog.ignoreAll, widgetUtils.BUTTON_PRESSED, self.ignoreAll)
widgetUtils.connect_event(self.dialog.replace, widgetUtils.BUTTON_PRESSED, self.replace)
widgetUtils.connect_event(self.dialog.replaceAll, widgetUtils.BUTTON_PRESSED, self.replaceAll)
self.check()
self.dialog.get_response()
self.fixed_text = self.checker.get_text()
def check(self):
try:
self.checker.next()
textToSay = _(u"Misspelled word: %s") % (self.checker.word,)
context = u"... %s %s %s" % (self.checker.leading_context(10), self.checker.word, self.checker.trailing_context(10))
self.dialog.set_title(textToSay)
output.speak(textToSay)
self.dialog.set_word_and_suggestions(word=self.checker.word, context=context, suggestions=self.checker.suggest())
except StopIteration:
log.debug("Process finished.")
wx_ui.finished()
self.dialog.Destroy()
def ignore(self, ev):
self.check()
def ignoreAll(self, ev):
self.checker.ignore_always(word=self.checker.word)
self.check()
def replace(self, ev):
self.checker.replace(self.dialog.get_selected_suggestion())
self.check()
def replaceAll(self, ev):
self.checker.replace_always(self.dialog.get_selected_suggestion())
self.check()
def clean(self):
if hasattr(self, "dialog"):
self.dialog.Destroy()
示例2: correct
# 需要导入模块: from enchant.checker import SpellChecker [as 别名]
# 或者: from enchant.checker.SpellChecker import get_text [as 别名]
def correct(text):
chkr = SpellChecker("en_US")
chkr.set_text(text)
for err in chkr:
sug = err.suggest()
if sug:
err.replace(sug[0])
return chkr.get_text()
示例3: spellcheck
# 需要导入模块: from enchant.checker import SpellChecker [as 别名]
# 或者: from enchant.checker.SpellChecker import get_text [as 别名]
def spellcheck(sentence):
checker = SpellChecker("en_US")
checker.set_text(sentence)
for error in checker:
for suggestion in error.suggest():
if error.word.replace(' ','') == suggestion.replace(' ',''):
error.replace(suggestion)
break
return checker.get_text()
示例4: suggest_correction
# 需要导入模块: from enchant.checker import SpellChecker [as 别名]
# 或者: from enchant.checker.SpellChecker import get_text [as 别名]
def suggest_correction(file_path):
"""return string representing the spell-corrected content of
the file specified by the file_path
"""
with open(file_path, "r") as file_to_check:
data = file_to_check.read()
checker = SpellChecker("en_US")
checker.set_text(data)
for err in checker:
# avoid IndexOutOfBounds
err.replace(checker.suggest()[0])
return checker.get_text()
示例5: spellchecker
# 需要导入模块: from enchant.checker import SpellChecker [as 别名]
# 或者: from enchant.checker.SpellChecker import get_text [as 别名]
def spellchecker(data):
tweet = []
for row in data:
val = row.split("\t")
if val[0] == 'Finding0###':
tweet.append(val[1].strip('\n'))
tweets = pd.DataFrame()
tweets['text'] = [tweet for tweet in tweet]
for i in tweets['text']:
text = i
chkr = SpellChecker("en_US", text)
for err in chkr:
print("{0}\t{1}".format("Findingres6###",err.word + " at position " + str(err.wordpos))) #<----
err.replace("SPAM")
print("Text replaced by spam at wrong spelling or words")
t = chkr.get_text()
print("\n" + t) #<----
示例6: cleaner
# 需要导入模块: from enchant.checker import SpellChecker [as 别名]
# 或者: from enchant.checker.SpellChecker import get_text [as 别名]
def cleaner(tweet):
"""correct the errors in the words mispelled"""
clean_text = tweet
#better performance in time elaboration in opposition with the spellchecker of the textblob (0.32s vs 0.85s)
from enchant.checker import SpellChecker
chkr = SpellChecker("en_GB") #you can set "en_US" to use the us dictionary
chkr.set_text(clean_text)
#print clean_text
for err in chkr:
if (len(err.word)>3 and re.search(u'\u2019',err.word)==None):
#print (err.word+" "+chkr.suggest(err.word)[0])
text = err.word
text = text.decode('latin-1')
if isinstance(text, str):
text = str(text.decode('ascii', 'ignore'))
else:
text = text.encode('ascii', 'ignore')
err.replace_always(chkr.suggest(text)[0])
clean_text = chkr.get_text()
return clean_text
示例7: zip
# 需要导入模块: from enchant.checker import SpellChecker [as 别名]
# 或者: from enchant.checker.SpellChecker import get_text [as 别名]
zipped_1 = zip(commentID, comments)
for Id, text in zipped_1:
chkr.set_text(text)
for error in chkr:
ERROR_WORDS.append(str(error.word))
SUGGEST_WORDS.append(str(error.suggest()))
print(str(error.word))
if ERROR_WORDS:
POSTCOMMENTID.append(str(Id))
COMMENTS.append(str(chkr.get_text()))
ERROR_WORDS_LIST.append(ERROR_WORDS)
SUGGEST_WORDS_LIST.append(SUGGEST_WORDS)
ERROR_WORDS = []
SUGGEST_WORDS = []
def find_suggested_words_repeated_letters(word):
repeated_letters = []
for i in list(''.join(s) for _, s in itertools.groupby(word)):
if len(i) >= 2:
repeated_letters.append(str(i))
main_list = []
combination_list = []
示例8: open
# 需要导入模块: from enchant.checker import SpellChecker [as 别名]
# 或者: from enchant.checker.SpellChecker import get_text [as 别名]
from enchant.checker import SpellChecker
import json
import pandas as pd
import matplotlib.pyplot as plt
"""Reading the file"""
tweets_data_path = 'namo.json'
tweets_data = []
tweets_file = open(tweets_data_path, "r")
for line in tweets_file:
try:
tweet = json.loads(line)
tweets_data.append(tweet)
except:
continue
"""Tweet data collection from json"""
tweets = pd.DataFrame()
tweets['text'] = [tweet.get('text','') for tweet in tweets_data]
for i in tweets['text']:
text = i
chkr = SpellChecker("en_US", text)
for err in chkr:
print(err.word + " at position " + str(err.wordpos)) #<----
err.replace("SPAM")
print("Text replaced by spam at wrong spelling or words")
t = chkr.get_text()
print("\n" + t) #<----
示例9: wxSpellCheckerDialog
# 需要导入模块: from enchant.checker import SpellChecker [as 别名]
# 或者: from enchant.checker.SpellChecker import get_text [as 别名]
import wx
from enchant.checker import SpellChecker
from enchant.checker.wxSpellCheckerDialog import wxSpellCheckerDialog
# Retreive the text to be checked
text = "this is some smple text with a few erors in it"
print "[INITIAL TEXT:]", text
# Need to have an App before any windows will be shown
app = wx.PySimpleApp()
# Construct the dialog, and the SpellChecker it is to use
dlg = wxSpellCheckerDialog(None)
chkr = SpellChecker("en_US",text)
dlg.SetSpellChecker(chkr)
# Display the dialog, allowing user interaction
if dlg.ShowModal() == wx.ID_OK:
# Checking completed successfully
# Retreive the modified text
print "[FINAL TEXT:]", chkr.get_text()
else:
# Checking was cancelled
print "[CHECKING CANCELLED]"
示例10: checker
# 需要导入模块: from enchant.checker import SpellChecker [as 别名]
# 或者: from enchant.checker.SpellChecker import get_text [as 别名]
def checker(f, outf, thefilename):
if str(f).find(".txt") == -1: return # We are not looking at a proper file
# print("Reading: " + str(f))
text = f.read()
subverbagg_err = 0
spellerrors = 0
#verb counting vars for entire doc
doc_vps_average = 0.0
nmv_error = 0
mvt_error = 0
vps_average = 0.0
my_spell_checker = MySpellChecker(max_dist=1)
chkr = SpellChecker("en_US", text)
for err in chkr:
# print(err.word + " at position " + str(err.wordpos))
err.replace(my_spell_checker.replace(err.word))
spellerrors = spellerrors + 1;
t = chkr.get_text()
# print("\n" + t)
## Useless code?
#print("Trying to print semantics")
#test_results = nltk.sem.util.interpret_sents(t, 'grammars/large_grammars/commandtalk.cfg')
#for result in test_results:
# for (synrep, semrep) in result:
# print(synrep)
## This divides it into a per-sentence item list.
sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
sentencearray = sent_detector.tokenize(t.strip())
sentencecount = len(sentencearray)
# print('\n-----\n'.join(sentencearray))
## This splits everything up into seperate words.
tokenized = TreebankWordTokenizer().tokenize(t)
#print(tokenized)
## Display spell errors found.
# print("Spellerrors: " + str(spellerrors))
# print("Words: " + str(len(tokenized)))
# print("Sentences: " + str(len(sentencearray)))
prevsentence = 0
#stanford
frag_count = 0
iw_count = 0
stanford_sentences_tree = stanford_parser.raw_parse_sents(sentencearray)
for stanford_sentences in stanford_sentences_tree:
for stanford_sentence in stanford_sentences:
frag_match = re.search("FRAG", str(stanford_sentence))
iw_match = re.findall(important_words, str(stanford_sentence))
iw_count = iw_count + len(iw_match)
#print(str(stanford_sentence))
#print('-----------------------------------')
if frag_match:
frag_count = frag_count + 1
for sentence in sentencearray:
tokenized_sentence = TreebankWordTokenizer().tokenize(sentence)
numwords = len(tokenized_sentence)
pos_tagged_sentence = nltk.pos_tag(tokenized_sentence)
# print(pos_tagged_sentence)
if( prevsentence == 0 ):
prevsentence = pos_tagged_sentence
## we make use of 'prevsentence' to work with section 2a, as we expect these two to be 'connected.
worksentence = prevsentence + pos_tagged_sentence
# 2a
# 1. First person singular pronouns and possessive adjectives /I, me, my, mine/ refer to the
# speaker / writer, are solved based on who the speaker is, and are not ambiguous. Same for
# First person plural pronouns, we, our, although they are harder to interpret since they refer
# to a "group" that includes the speaker. Second person pronouns (you, your) can be used
# as well, in an impersonal sense { as in the following example from the excerpt of the "high"
# essay included in the first part of project: ... going to the places you choose to go to and
# discovering everything on your own.
### Aka, we ignore: I, me my, mine, you, your.
# 2. Third person singular pronouns are hardly used in these essays. Doublecheck if they do. If
# you find a he or she you can quickly assess whether it is used properly: any third person
# pronoun should have a possible antecedent. If she is used and no feminine entity has been
# introduced, then she is wrong (see below a note on where to find the information about gender
# and number); likewise for he and male antecedents.
### Female antecedents: mother, aunt, sister, niece
### Male antecedents: father, uncle, brother, nephew
wronggenderantecedent = 0
for x in range(0,len(worksentence)):
teststring = ""
if( worksentence[x][0] == "he"):
wronggenderantecedent = wronggenderantecedent + 1
for walker in range(0,x):
teststring += worksentence[walker][0]
for each in maleantecedents:
if( teststring.find(each) != -1):
wronggenderantecedent = wronggenderantecedent - 1
break
#.........这里部分代码省略.........