当前位置: 首页>>代码示例>>Python>>正文


Python utf8.get_letters函数代码示例

本文整理汇总了Python中tamil.utf8.get_letters函数的典型用法代码示例。如果您正苦于以下问题:Python get_letters函数的具体用法?Python get_letters怎么用?Python get_letters使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了get_letters函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: removeSuffix

 def removeSuffix(self, word):
     removed = False
     if not self.possible_suffixes:
         # init once
         self.setSuffixes()
         self.prepareSuffixes()
     word_lett = utf8.get_letters(word)
     rword_lett = copy.copy(word_lett)
     rword_lett.reverse()
     # print('rev word ->',rword_lett)
     rword = u"".join(rword_lett)
     longest_match = ""
     for itr in range(len(self.reversed_suffixes)):
         suffix = self.reversed_suffixes[itr]
         # print(itr,utf8.get_letters(suffix))
         if rword.startswith(suffix):
             if len(longest_match) <= len(suffix):
                 longest_match = suffix
                 # print('L-match-->',utf8.get_letters(longest_match))
         continue
     if len(longest_match) > 0:
         removed = True
         sfx = []
         for itr in range(len(utf8.get_letters(longest_match))):
             sfx.append(word_lett.pop())
         word = u"".join(word_lett)
         sfx.reverse()
         sfx = u"".join(sfx)
         # rule to replace suffix
         alt_suffix = self.replace_suffixes.get(sfx, None)
         if alt_suffix:
             word = word + alt_suffix
     return word, removed
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:33,代码来源:morphology.py

示例2: test_reverse_words

 def test_reverse_words( self ):
     """ unittest for reverse a Tamil string"""
     print utf8.get_letters(u"இந்த")
     print u"".join(utf8.get_letters(u"இந்த"))
     for word in u"இந்த (C) tamil முத்தையா அண்ணாமலை 2013 இந்த ஒரு எழில் தமிழ் நிரலாக்க மொழி உதாரணம்".split():
         rword = utf8.reverse_word(word)
         print word,rword
         self.assertTrue( utf8.get_letters(rword)[0] == utf8.get_letters(word)[-1] )
     return
开发者ID:tk120404,项目名称:open-tamil,代码行数:9,代码来源:letter_tests.py

示例3: test_istamil

 def test_istamil( self ):
     zz = u"முத்தையா அண்ணாமலை எந்த ஒரு தெரிந்த அல்லது தெரியாத எழுத்துருவாகவிருந்தாலும் அதனை மேல்தட்டில் உள்ளிட்டு கீழே உள்ள முடியும்"
     for z in zz.split(u" "):
         print("********** t/f ********")
         for x,y in zip(map(utf8.istamil,utf8.get_letters(z)),utf8.get_letters(z)):
             print("%s => %s"%(y,x))        
             assert( all( map( utf8.istamil, utf8.get_letters( z ) ) ) )
     
     z = u"முத்தையா அண்ணாமலை"
     assert( any( map( utf8.istamil, utf8.get_letters( z ) ) ) )
     
     correct = [True, True, True, True, False, True, True, True, True, True, False, False, False, False, False]
     assert( map(utf8.istamil,utf8.get_letters(u"முத்தையா அண்ணாமலை 2013")) == correct )
开发者ID:tk120404,项目名称:open-tamil,代码行数:13,代码来源:letter_tests.py

示例4: test_entity

 def test_entity(self):
     word = u"nuthin"
     q = WordEntity(word,row=5,col=6)
     self.assertEqual(q.word,word)
     self.assertEqual(q.letters,utf8.get_letters(u"nuthin"))
     self.assertEqual((q.row, q.col),(5,6))
     self.assertTrue(q.isWord())
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:7,代码来源:solthiruthi_dom.py

示例5: getWordCount

 def getWordCount(self,word):
     isWord, ref_trie = self.isWord( word, ret_ref_trie = True)
     if not isWord:
         raise Exception(u"Word does not exist in Trie")
     #pprint(str(ref_trie))
     letters = utf8.get_letters( word )
     return ref_trie.count[ letters[-1] ]
开发者ID:msathia,项目名称:Ezhil-Lang,代码行数:7,代码来源:datastore.py

示例6: norvig_suggestor

def norvig_suggestor(word,alphabets=None,nedits=1,limit=float("inf")):
    if not alphabets:
        alphabets = tamil_letters
    if not type(word) is list:
        wordL = get_letters(word)
    else:
        wordL = word
    # recursive method for edit distance > 1
    if nedits > 1:
        result = []
        for nAlternate in norvig_suggestor(wordL,alphabets,nedits-1,limit-len(result)):
            if len(result) > limit:
                break
            result.extend( norvig_suggestor(nAlternate,alphabets,1,limit-len(result)) )
        return set(result)
       
    ta_splits     = [ [u"".join(wordL[:idx-1]),u"".join(wordL[idx:])] for idx in range(len(wordL) + 1)]
    #pprint( ta_splits )
    ta_deletes    = [a + b[1:] for a, b in ta_splits if b]
    ta_transposes = [a + b[1] + b[0] + b[2:] for a, b in ta_splits if len(b)>1]
    ta_replaces   = [a + c + b[1:] for a, b in ta_splits for c in alphabets ]
    ta_replaces2   = [ c + b for a, b in ta_splits for c in alphabets ]
    ta_inserts    = [a + c + b     for a, b in ta_splits for c in alphabets]
    # TODO: add a normalizing pass word words in vowel+consonant forms to eliminate dangling ligatures
    return set(ta_deletes + ta_transposes + ta_replaces + ta_replaces2 + ta_inserts )
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:25,代码来源:suggestions.py

示例7: test_letter_extract_with_ascii

 def test_letter_extract_with_ascii(self):
     letters = utf8.get_letters(u"கூவிளம் is என்பது also என்ன a சீர்")
     print "len ==== > " , len(letters)
     assert(len(letters) == 25 )
     for pos,letter in  enumerate(letters):
         print(u"%d %s"%(pos,letter))
     assert( letters[-4] == u"a" )
开发者ID:srikanthlogic,项目名称:open-tamil,代码行数:7,代码来源:letter_tests.py

示例8: test_classifier

 def test_classifier(self):
     expected = []
     expected.extend(['english']*3)
     expected.extend(['digit']*4)
     expected.extend(['kuril','nedil','uyirmei','vallinam','uyirmei'])
     data = list(map(utf8.classify_letter,utf8.get_letters(u"abc1230அஆரெட்டை")))
     self.assertEqual(data,expected)
开发者ID:msathia,项目名称:Ezhil-Lang,代码行数:7,代码来源:letter_tests2.py

示例9: test_words_to_letters

 def test_words_to_letters(self):
     k1 = u"இந்தக் குளிர்ல டெய்லி தலைக்கு குளிக்கற நல்லவங்க இருக்கறதாலதான் கோவை இப்படி சூப்பரா இருக்காம்"
     word_length = [4,4,3,4,5,6,9,2,4,4,5]
     for idx,kk in enumerate(k1.split(' ')):
         idx_len = len( get_letters(kk) )
         print('w# ',idx, idx_len )
         self.assertEqual( word_length[idx], idx_len)
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:7,代码来源:keechu.py

示例10: test_letter_extract_from_code_pts

 def test_letter_extract_from_code_pts(self):
     letters = utf8.get_letters(u"கூவிளம் என்பது என்ன சீர்")
     #print "len ==== > " , len(letters)
     assert( len(letters) == 15 )
     for pos,letter in  enumerate(letters):
         print(u"%d %s"%(pos,letter))
     assert( letter == (u"ர்") )
开发者ID:tk120404,项目名称:open-tamil,代码行数:7,代码来源:letter_tests.py

示例11: get

 def get(word):
     word = word.strip()
     word = word.replace(u' ',u'')
     letters = utf8.get_letters(word)
     F = Feature()
     F.nletters = len(letters)*1.0
     F.unigscore = unigram_score(letters)
     F.bigscore = max(bigram_scores(letters))
     for l in letters:
         try:
             rtl = reverse_transliterate(l)
             if any( [rtl.startswith(l) for l  in ['a','e','i','o','u'] ] ):
                 F.vowels += 1.0
         except Exception as ioe:
             pass
         
         kind = utf8.classify_letter(l)
         if kind == 'kuril':
             F.kurils += 1
         elif kind == 'nedil':
             F.nedils += 1
         elif kind == 'ayudham':
             F.ayudhams += 1
         elif kind == 'vallinam':
             F.vallinams += 1
         elif kind == 'mellinam':
             F.mellinams += 1
         elif kind == 'idayinam':
             F.idayinams += 1
         elif kind in ['english','digit']:
             continue
         elif kind == 'tamil_or_grantham':
             F.granthams += 1
     
     F.kurils /= F.nletters
     F.nedils /= F.nletters
     F.ayudhams /= F.nletters
     F.vallinams /= F.nletters
     F.vallinams /= F.nletters
     F.mellinams /= F.nletters
     F.idayinams /= F.nletters
     F.granthams /= F.nletters
     F.vowels /= F.nletters
     
     if letters[0] in utf8.uyir_letters:
         F.first += 1.0
     if letters[0] in utf8.mei_letters:
         F.first += F.first + 0.25
     if letters[0] in utf8.uyirmei_letters:
         F.first += F.first + 0.05
     
     if letters[-1] in utf8.uyir_letters:
         F.last += 1.0
     if letters[-1] in utf8.mei_letters:
         F.last += F.last + 0.25
     if letters[-1] in utf8.uyirmei_letters:
         F.last += F.last + 0.05
     
     return F
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:59,代码来源:preprocess.py

示例12: anagram

def anagram(request,word):
    AllTrueDictionary = wordutils.DictionaryWithPredicate(lambda x: True)
    TVU,TVU_size = DictionaryBuilder.create(TamilVU)
    length = len(utf8.get_letters(word))
    actual =list(wordutils.anagrams(word,TVU))
    json_string = json.dumps(actual,ensure_ascii = False)
    #creating a Response object to set the content type and the encoding
    response = HttpResponse(json_string,content_type="application/json; charset=utf-8" )
    return response 
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:9,代码来源:views.py

示例13: test_tamil_only_words

 def test_tamil_only_words(self):
     s = u"உடனே உடனே seventh heaven எழுதினால் செய்திப் பத்திரிகை போஆகிவிடும் அசோகமித்திரன் நேர்காணல்"
     words = s.replace(u"seventh heaven ",u"").split(u" ")
     letters = utf8.get_letters( s )
     outWords = utf8.get_tamil_words( letters )
     if ( LINUX ):
         print( u"|".join(words) )
         print( u"|".join(outWords) )
     self.assertEqual( outWords, words )
开发者ID:nomad-vino,项目名称:open-tamil,代码行数:9,代码来源:letter_tests.py

示例14: getAllWordsPrefix

 def getAllWordsPrefix(self,prefix):
     raise Exception("NOT IMPLEMENTED RIGHT")
     all_words = []
     val,ref_trie,ref_word_limits = self.isWord(prefix,ret_ref_trie=True)
     # ignore val
     if val: all_words.append( prefix )
     prefix_letters = utf8.get_letters(prefix)
     self.getAllWordsHelper( ref_trie, ref_word_limits, prefix_letters, all_words)
     return all_words
开发者ID:msathia,项目名称:Ezhil-Lang,代码行数:9,代码来源:datastore.py

示例15: keech

def keech(request,k1):
    dic={}
    for idx,kk in enumerate(k1.split(' ')):
            idx_len = len( get_letters(kk) )
            #print('w# ',idx, idx_len )
            dic[idx]=idx_len
    json_string = json.dumps(dic,ensure_ascii = False)
    #creating a Response object to set the content type and the encoding
    response = HttpResponse(json_string,content_type="application/json; charset=utf-8" )
    return response
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:10,代码来源:views.py


注:本文中的tamil.utf8.get_letters函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。