当前位置: 首页>>代码示例>>Python>>正文


Python Dictionary.bestEnglishWordForSpanishWordToken方法代码示例

本文整理汇总了Python中Dictionary.Dictionary.bestEnglishWordForSpanishWordToken方法的典型用法代码示例。如果您正苦于以下问题:Python Dictionary.bestEnglishWordForSpanishWordToken方法的具体用法?Python Dictionary.bestEnglishWordForSpanishWordToken怎么用?Python Dictionary.bestEnglishWordForSpanishWordToken使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Dictionary.Dictionary的用法示例。


在下文中一共展示了Dictionary.bestEnglishWordForSpanishWordToken方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from Dictionary import Dictionary [as 别名]
# 或者: from Dictionary.Dictionary import bestEnglishWordForSpanishWordToken [as 别名]
class ModifiedTranslator:
    def __init__(self):
        self.dictionary = Dictionary()

    # Pre processing strategies
    def createWordLookup(self, foreignSentence):
        corpus = Corpus()
        tokenDictList = []

        """Captures only words, no spaces/punctuation"""
        spanishTokens = re.compile('(\W+)', re.UNICODE).split(unicode(foreignSentence, 'utf-8'))
        spanishTokens.pop()
        
        for idx, token in enumerate(spanishTokens):
            tokenDict = dict()
            tokenDict['originalToken'] = token
            tokenDict['spanish_POS'] = corpus.spanishTags().get(token, None)
            if (len(token) > 0):
                if token[0].isupper():
                    tokenDict['upper'] = True
                else:
                    tokenDict['upper'] = False
            else:
                tokenDict['upper'] = False
            tokenDictList.append(tokenDict)
            
        self.tokenDictList = tokenDictList
            
    def translateSentence(self, foreignSentence):
        # Pre processing
        self.createWordLookup(foreignSentence)
        
        # Translation
        translatedSentence = ""
        for spanishToken in self.tokenDictList:
            originalToken = spanishToken['originalToken']
            
            translatedWord = self.dictionary.bestEnglishWordForSpanishWordToken(spanishToken)
            if translatedWord:
                spanishToken['translatedToken'] = translatedWord
            else:
                spanishToken['translatedToken'] = originalToken
                          
        # Post processing
        # Strategies
        # 1 - Preserve capitalization
        self.capitalizeWords()
        
        # 2 - Flip object pronouns (gave him -> le dio vs dio le)
        self.flipObjectPronouns()
        
        # 3 - Flip adjectives and nouns (whole book -> libro entero vs entero libro)
        self.flipAdjectivesAndNouns()

        #4 - For infinitive, future, and conditional verbs, insert 'to', 'will', and 'would respectively'
        self.correctVerbForm()
        
        # Eventually turn into a sentence
        for token in self.tokenDictList:
            translatedToken = token['translatedToken']
            translatedSentence = translatedSentence + translatedToken
        
        return translatedSentence
    
    # Post processing strategies
    def capitalizeWords(self):
        for token in self.tokenDictList:
            if token['upper']:    
                translatedToken = token['translatedToken']
                shouldCapitalize = True
                if (len(translatedToken) > 1):
                    if translatedToken[1].isupper():
                        shouldCapitalize = False
                        
                if shouldCapitalize:
                    token['translatedToken'] = translatedToken.capitalize()
                    
    def flipObjectPronouns(self):
        objectPronoun = None
        verb = None
        
        # find an object pronoun
        for idx, token in enumerate(self.tokenDictList):
            spanishPOS = token['spanish_POS']
            if spanishPOS:
                if (spanishPOS[0] == 'p') and (spanishPOS[1] == 'p'):
                    possibleVerbTokenIndex = idx + 2
                    if (len(self.tokenDictList) > possibleVerbTokenIndex):
                        possibleVerbToken = self.tokenDictList[possibleVerbTokenIndex]
                        possibleVerbTag = possibleVerbToken['spanish_POS']
                        if (possibleVerbTag and (possibleVerbTag[0] == 'v')):
                            objectPronoun = token
                            verb = possibleVerbToken
            
        if (objectPronoun and verb):
            idx1, idx2 = self.tokenDictList.index(objectPronoun), self.tokenDictList.index(verb)
            self.tokenDictList[idx2], self.tokenDictList[idx1] = self.tokenDictList[idx1], self.tokenDictList[idx2]
    
    def flipAdjectivesAndNouns(self):
        noun = None
#.........这里部分代码省略.........
开发者ID:danielsht86,项目名称:cs124-pa6,代码行数:103,代码来源:ModifiedTranslator.py


注:本文中的Dictionary.Dictionary.bestEnglishWordForSpanishWordToken方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。