当前位置: 首页>>代码示例>>Python>>正文


Python SpellChecker.get_text方法代码示例

本文整理汇总了Python中enchant.checker.SpellChecker.get_text方法的典型用法代码示例。如果您正苦于以下问题:Python SpellChecker.get_text方法的具体用法?Python SpellChecker.get_text怎么用?Python SpellChecker.get_text使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在enchant.checker.SpellChecker的用法示例。


在下文中一共展示了SpellChecker.get_text方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: spellChecker

# 需要导入模块: from enchant.checker import SpellChecker [as 别名]
# 或者: from enchant.checker.SpellChecker import get_text [as 别名]
class spellChecker(object):
	def __init__(self, text, dictionary):
		super(spellChecker, self).__init__()
		log.debug("Creating the SpellChecker object. Dictionary: %s" % (dictionary,))
		self.active = True
		try:
			if config.app["app-settings"]["language"] == "system":
				log.debug("Using the system language")
				self.checker = SpellChecker(languageHandler.curLang, filters=[tokenize.EmailFilter, tokenize.URLFilter])
			else:
				log.debug("Using language: %s" % (languageHandler.getLanguage(),))
				self.checker = SpellChecker(languageHandler.curLang, filters=[tokenize.EmailFilter, tokenize.URLFilter])
			self.checker.set_text(text)
		except DictNotFoundError:
			print "no dict"
			log.exception("Dictionary for language %s not found." % (dictionary,))
			wx_ui.dict_not_found_error()
			self.active = False
		if self.active == True:
			log.debug("Creating dialog...")
			self.dialog = wx_ui.spellCheckerDialog()
			widgetUtils.connect_event(self.dialog.ignore, widgetUtils.BUTTON_PRESSED, self.ignore)
			widgetUtils.connect_event(self.dialog.ignoreAll, widgetUtils.BUTTON_PRESSED, self.ignoreAll)
			widgetUtils.connect_event(self.dialog.replace, widgetUtils.BUTTON_PRESSED, self.replace)
			widgetUtils.connect_event(self.dialog.replaceAll, widgetUtils.BUTTON_PRESSED, self.replaceAll)
			self.check()
			self.dialog.get_response()
			self.fixed_text = self.checker.get_text()

	def check(self):
		try:
			self.checker.next()
			textToSay = _(u"Misspelled word: %s") % (self.checker.word,)
			context = u"... %s %s %s" % (self.checker.leading_context(10), self.checker.word, self.checker.trailing_context(10))
			self.dialog.set_title(textToSay)
			output.speak(textToSay)
			self.dialog.set_word_and_suggestions(word=self.checker.word, context=context, suggestions=self.checker.suggest())
		except StopIteration:
			log.debug("Process finished.")
			wx_ui.finished()
			self.dialog.Destroy()

	def ignore(self, ev):
		self.check()

	def ignoreAll(self, ev):
		self.checker.ignore_always(word=self.checker.word)
		self.check()

	def replace(self, ev):
		self.checker.replace(self.dialog.get_selected_suggestion())
		self.check()

	def replaceAll(self, ev):
		self.checker.replace_always(self.dialog.get_selected_suggestion())
		self.check()

	def clean(self):
		if hasattr(self, "dialog"):
			self.dialog.Destroy()
开发者ID:manuelcortez,项目名称:socializer,代码行数:62,代码来源:spellchecker.py

示例2: correct

# 需要导入模块: from enchant.checker import SpellChecker [as 别名]
# 或者: from enchant.checker.SpellChecker import get_text [as 别名]
def correct(text):
	chkr = SpellChecker("en_US")
	chkr.set_text(text)
	for err in chkr:
		sug = err.suggest()
		if sug:
			err.replace(sug[0])
	return chkr.get_text()
开发者ID:llhtran,项目名称:senior_project,代码行数:10,代码来源:gen_to_data.py

示例3: spellcheck

# 需要导入模块: from enchant.checker import SpellChecker [as 别名]
# 或者: from enchant.checker.SpellChecker import get_text [as 别名]
def spellcheck(sentence):
	checker = SpellChecker("en_US")
	checker.set_text(sentence)
	for error in checker:
		for suggestion in error.suggest():
			if error.word.replace(' ','') == suggestion.replace(' ',''):
				error.replace(suggestion)
				break
	return checker.get_text()
开发者ID:manalsali,项目名称:SentimentClassification,代码行数:11,代码来源:transcript.py

示例4: suggest_correction

# 需要导入模块: from enchant.checker import SpellChecker [as 别名]
# 或者: from enchant.checker.SpellChecker import get_text [as 别名]
def suggest_correction(file_path):
    """return string representing the spell-corrected content of
    the file specified by the file_path
    """
    with open(file_path, "r") as file_to_check:
        data = file_to_check.read()
        checker = SpellChecker("en_US")
        checker.set_text(data)
        for err in checker:
            # avoid IndexOutOfBounds
            err.replace(checker.suggest()[0])
        return checker.get_text()
开发者ID:AnalysisBots,项目名称:runtime,代码行数:14,代码来源:spell_check_bot.py

示例5: spellchecker

# 需要导入模块: from enchant.checker import SpellChecker [as 别名]
# 或者: from enchant.checker.SpellChecker import get_text [as 别名]
def spellchecker(data):
  tweet = []
  for row in data:
    val = row.split("\t")
    if val[0] == 'Finding0###':
      tweet.append(val[1].strip('\n'))
  tweets = pd.DataFrame()
  tweets['text'] = [tweet for tweet in tweet]
  
  for i in tweets['text']:
    text = i
    chkr = SpellChecker("en_US", text)
    for err in chkr:
        print("{0}\t{1}".format("Findingres6###",err.word + " at position " + str(err.wordpos)))  #<----
        err.replace("SPAM")
    print("Text replaced by spam at  wrong spelling or words")
    t = chkr.get_text()
    print("\n" + t)  #<----
开发者ID:NeelimaDatla,项目名称:Big_Data_Project,代码行数:20,代码来源:newreducer.py

示例6: cleaner

# 需要导入模块: from enchant.checker import SpellChecker [as 别名]
# 或者: from enchant.checker.SpellChecker import get_text [as 别名]
def cleaner(tweet):
	"""correct the errors in the words mispelled"""
	clean_text = tweet	
	
	#better performance in time elaboration in opposition with the spellchecker of the textblob (0.32s vs 0.85s)
	from enchant.checker import SpellChecker
	chkr = SpellChecker("en_GB") #you can set "en_US" to use the us dictionary
	chkr.set_text(clean_text)
	#print clean_text
	for err in chkr:
		if (len(err.word)>3 and re.search(u'\u2019',err.word)==None):
			#print (err.word+" "+chkr.suggest(err.word)[0])
			text = err.word
			text = text.decode('latin-1')
			if isinstance(text, str):
				text = str(text.decode('ascii', 'ignore'))
			else:
				text = text.encode('ascii', 'ignore')
			err.replace_always(chkr.suggest(text)[0])
	
	clean_text = chkr.get_text()
	
	return clean_text
开发者ID:fmaglia,项目名称:SA_cleaners,代码行数:25,代码来源:dictionary.py

示例7: zip

# 需要导入模块: from enchant.checker import SpellChecker [as 别名]
# 或者: from enchant.checker.SpellChecker import get_text [as 别名]
zipped_1 = zip(commentID, comments)

for Id, text in zipped_1:
    
    chkr.set_text(text)
    
    for error in chkr:
        
        ERROR_WORDS.append(str(error.word))
        SUGGEST_WORDS.append(str(error.suggest()))
        print(str(error.word))

    if ERROR_WORDS:

        POSTCOMMENTID.append(str(Id))
        COMMENTS.append(str(chkr.get_text()))
        ERROR_WORDS_LIST.append(ERROR_WORDS)
        SUGGEST_WORDS_LIST.append(SUGGEST_WORDS)
        ERROR_WORDS = []
        SUGGEST_WORDS = []


def find_suggested_words_repeated_letters(word):

    repeated_letters = []
    for i in list(''.join(s) for _, s in itertools.groupby(word)):
        if len(i) >= 2:
            repeated_letters.append(str(i))

    main_list = []
    combination_list = []
开发者ID:Vahidsj,项目名称:first-repo,代码行数:33,代码来源:SpellingCorrector-Level1.py

示例8: open

# 需要导入模块: from enchant.checker import SpellChecker [as 别名]
# 或者: from enchant.checker.SpellChecker import get_text [as 别名]
from enchant.checker import SpellChecker
import json
import pandas as pd
import matplotlib.pyplot as plt
"""Reading the file"""
tweets_data_path = 'namo.json'

tweets_data = []
tweets_file = open(tweets_data_path, "r")
for line in tweets_file:
    try:
        tweet = json.loads(line)
        tweets_data.append(tweet)
    except:
        continue

"""Tweet data collection from json"""
tweets = pd.DataFrame()
tweets['text'] = [tweet.get('text','') for tweet in tweets_data]
for i in tweets['text']:
    text = i
    chkr = SpellChecker("en_US", text)
    for err in chkr:
        print(err.word + " at position " + str(err.wordpos))  #<----
        err.replace("SPAM")
    print("Text replaced by spam at  wrong spelling or words")
    t = chkr.get_text()
    print("\n" + t)  #<----
开发者ID:NeelimaDatla,项目名称:Big_Data_Project,代码行数:30,代码来源:spellcheck.py

示例9: wxSpellCheckerDialog

# 需要导入模块: from enchant.checker import SpellChecker [as 别名]
# 或者: from enchant.checker.SpellChecker import get_text [as 别名]

import wx

from enchant.checker import SpellChecker
from enchant.checker.wxSpellCheckerDialog import wxSpellCheckerDialog

# Retreive the text to be checked
text = "this is some smple text with a few erors in it"
print "[INITIAL TEXT:]", text

# Need to have an App before any windows will be shown
app = wx.PySimpleApp()

# Construct the dialog, and the SpellChecker it is to use
dlg = wxSpellCheckerDialog(None)
chkr = SpellChecker("en_US",text)
dlg.SetSpellChecker(chkr)

# Display the dialog, allowing user interaction
if dlg.ShowModal() == wx.ID_OK:
    # Checking completed successfully
    # Retreive the modified text
    print "[FINAL TEXT:]", chkr.get_text()
else:
    # Checking was cancelled
    print "[CHECKING CANCELLED]"
    


开发者ID:Jhanani,项目名称:IIS-Healthpoint,代码行数:28,代码来源:wx_example.py

示例10: checker

# 需要导入模块: from enchant.checker import SpellChecker [as 别名]
# 或者: from enchant.checker.SpellChecker import get_text [as 别名]
def checker(f, outf, thefilename):
    if str(f).find(".txt") == -1: return # We are not looking at a proper file
    # print("Reading: " + str(f))
    text = f.read() 

    subverbagg_err = 0
    spellerrors = 0
    #verb counting vars for entire doc
    doc_vps_average = 0.0
    nmv_error = 0
    mvt_error = 0
    vps_average = 0.0

    my_spell_checker = MySpellChecker(max_dist=1)
    chkr = SpellChecker("en_US", text)
    for err in chkr:
        # print(err.word + " at position " + str(err.wordpos))
        err.replace(my_spell_checker.replace(err.word))
        spellerrors = spellerrors + 1;        

    t = chkr.get_text()
    # print("\n" + t)

    ## Useless code?
    #print("Trying to print semantics")
    #test_results = nltk.sem.util.interpret_sents(t, 'grammars/large_grammars/commandtalk.cfg')
    #for result in test_results:
    #    for (synrep, semrep) in result:
    #        print(synrep)

    ## This divides it into a per-sentence item list.
    sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
    sentencearray = sent_detector.tokenize(t.strip())
    sentencecount = len(sentencearray)
    # print('\n-----\n'.join(sentencearray))

    ## This splits everything up into seperate words.
    tokenized = TreebankWordTokenizer().tokenize(t)
    #print(tokenized)

    ## Display spell errors found.
    # print("Spellerrors: " + str(spellerrors))
    # print("Words: " + str(len(tokenized)))
    # print("Sentences: " + str(len(sentencearray)))
    prevsentence = 0

    #stanford
    frag_count = 0
    iw_count = 0
    stanford_sentences_tree = stanford_parser.raw_parse_sents(sentencearray)
    for stanford_sentences in stanford_sentences_tree:
        for stanford_sentence in stanford_sentences:
            frag_match = re.search("FRAG", str(stanford_sentence))
            iw_match = re.findall(important_words, str(stanford_sentence))
            iw_count = iw_count + len(iw_match)
            #print(str(stanford_sentence))
            #print('-----------------------------------')
            if frag_match:
                frag_count = frag_count + 1
    
    for sentence in sentencearray:
        tokenized_sentence = TreebankWordTokenizer().tokenize(sentence)
        numwords = len(tokenized_sentence)
        pos_tagged_sentence = nltk.pos_tag(tokenized_sentence)
        # print(pos_tagged_sentence)
        
        if( prevsentence == 0 ):
            prevsentence = pos_tagged_sentence

        ## we make use of 'prevsentence' to work with section 2a, as we expect these two to be 'connected.
        worksentence = prevsentence + pos_tagged_sentence

        # 2a
        # 1. First person singular pronouns and possessive adjectives /I, me, my, mine/ refer to the
        # speaker / writer, are solved based on who the speaker is, and are not ambiguous. Same for
        # First person plural pronouns, we, our, although they are harder to interpret since they refer
        # to a "group" that includes the speaker. Second person pronouns (you, your) can be used
        # as well, in an impersonal sense { as in the following example from the excerpt of the "high"
        # essay included in the first part of project: ... going to the places you choose to go to and
        # discovering everything on your own.
        ###  Aka, we ignore: I, me my, mine, you, your.

        # 2. Third person singular pronouns are hardly used in these essays. Doublecheck if they do. If
        # you find a he or she you can quickly assess whether it is used properly: any third person
        # pronoun should have a possible antecedent. If she is used and no feminine entity has been
        # introduced, then she is wrong (see below a note on where to find the information about gender
        # and number); likewise for he and male antecedents.
        ### Female antecedents: mother, aunt, sister, niece
        ### Male antecedents: father, uncle, brother, nephew
        wronggenderantecedent = 0
        for x in range(0,len(worksentence)):
            teststring = ""
            if( worksentence[x][0] == "he"):
                wronggenderantecedent = wronggenderantecedent + 1
                for walker in range(0,x):
                  teststring += worksentence[walker][0]
                for each in maleantecedents:
                  if( teststring.find(each) != -1):
                      wronggenderantecedent = wronggenderantecedent - 1
                      break
#.........这里部分代码省略.........
开发者ID:HarryCordewener,项目名称:NLP_Walter_Harry,代码行数:103,代码来源:run.py


注:本文中的enchant.checker.SpellChecker.get_text方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。