當前位置: 首頁>>代碼示例>>Python>>正文


Python Filter.strip方法代碼示例

本文整理匯總了Python中Filter.Filter.strip方法的典型用法代碼示例。如果您正苦於以下問題:Python Filter.strip方法的具體用法?Python Filter.strip怎麽用?Python Filter.strip使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在Filter.Filter的用法示例。


在下文中一共展示了Filter.strip方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: POS

# 需要導入模塊: from Filter import Filter [as 別名]
# 或者: from Filter.Filter import strip [as 別名]
class POS(object):
    '''
    Class for POS tagging, use POS tagger from NLTK.
    '''

    def __init__(self):
        '''
        Constructor inisiates the filter. Along with the Taggers which will be used,
        And loads the copora. 
        '''   
        self.FF = Filter()
        
        try:
            #Attempt to open .plk file and load. 
            input = open("./Corpus/Brown-Uni.pkl", 'rb')
            self.unigram_tagger = load(input)
            input.close() 
        except IOError as e:   
            self.brown_tagged_sents = nltk.corpus.brown.tagged_sents(simplify_tags=True)
            t0 = nltk.DefaultTagger('NN')
            t1 = nltk.UnigramTagger(self.brown_tagged_sents, backoff=t0)
            t2 = nltk.BigramTagger(self.brown_tagged_sents, backoff=t1)
            self.unigram_tagger = nltk.UnigramTagger(self.brown_tagged_sents, backoff=t2)
            
            output = open("./Corpus/Brown-Uni.pkl", 'wb')
            dump(self.unigram_tagger, output, -1)
            output.close()
    
    def POSTag(self, text, s='false'):
        '''
        Method to POS tagged the Tokonized text.
        
        @param text: TOK text which is going to be POS tagged
        @param s: Whether is it a sentence of not. 
        
        @return: POSTaged version of input  
        '''
        if s == 'false':
            sentance = nltk.sent_tokenize(text)
            sentance = [nltk.word_tokenize(self.FF.strip(sent)) for sent in sentance]
            sentance = [self.unigram_tagger.tag(sent) for sent in sentance]
        elif s == 'tok':
            sentance = [self.unigram_tagger.tag(sent,) for sent in text]
        else:
            sentance = self.unigram_tagger.tag(text)
        
        
        return sentance
    
    def POSNgram(self, text, s='false', n=3):
        '''
        Method to POS tag N-grams 
        
        @param text: n-grams to be POS tagged
        @param s: Whether is it a sentence of not. 
        @param n: length of n gram  
        
        @return: POS-Tagged n-grams 
        '''
        if s == 'false':
            sentance = self.POSTag(text);
            sentence = [nltk.ngrams(sent, n) for sent in sentance]
        else:
            sentence = [nltk.ngrams(sent, n) for sent in text]
        
        return sentence
開發者ID:danjamker,項目名稱:N-Fly,代碼行數:68,代碼來源:POS.py

示例2: NGram

# 需要導入模塊: from Filter import Filter [as 別名]
# 或者: from Filter.Filter import strip [as 別名]
class NGram(object):
    '''
    classdocs
    '''

    def __init__(self):
        '''
        Constructor
        '''
        self.F = Filter()
        
    def Grams(self, pos, n=3, boundy=1):
        '''
        
        '''
        ngrams = []
        for x in range(2, n):
            ngrams.append(self.Gram(pos, n=x))
        
        tmp = []
        
        for x in range(0, n-2):
            tmp = tmp + ngrams[x]
                   
        return tmp
        
    def Gram(self, text, n=3, boundy=1):
        '''
        @param text: text to be created into Ngrams
        @param n: Length of Ngrams
        @param boundy: Number of instiances of gram
           
        @return: List of ngrams of text
        '''
        
        sentence = [nltk.ngrams(sent, n) for sent in text]
        t = []
        for s in sentence:
            t = t + s
            
        freq = nltk.FreqDist(t)
        
        tmp = []
        for f in freq.keys():
            if int(freq[f]) > boundy:
                tmp.append(f)
            
        return tmp
    
    def NGramUn(self, text, n=3):

        sentance = nltk.sent_tokenize(text)     
        sentance = [nltk.word_tokenize(self.F.strip(sent)) for sent in sentance]  
        sentence = [nltk.ngrams(sent, n) for sent in sentance]
    
        return sentence
    
    def capitalList(self, text):
        '''
        @param text: text input which has to be 
        @return: List of tagged words which havve all capitalized first letters
        ''' 
        tmp = []
        
        for sent in text:
            count = 0
            for word in sent:
                if (word[0][0].isupper() & count == 0) | (word[0][0].islower() & count > 0):
                    t = []
                    for x in range(count, len(sent)):
                        if  sent[x][0][0].isupper():
                            t.append(sent[x])
                        else:
                            if len(t) >= 2:
                                tmp.append(t)
                            t = []
                            break
                    
                count = count + 1
                
        return tmp
開發者ID:danjamker,項目名稱:N-Fly,代碼行數:83,代碼來源:NGram.py


注:本文中的Filter.Filter.strip方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。