当前位置: 首页>>代码示例>>Python>>正文


Python Filter.strip方法代码示例

本文整理汇总了Python中Filter.Filter.strip方法的典型用法代码示例。如果您正苦于以下问题:Python Filter.strip方法的具体用法?Python Filter.strip怎么用?Python Filter.strip使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Filter.Filter的用法示例。


在下文中一共展示了Filter.strip方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: POS

# 需要导入模块: from Filter import Filter [as 别名]
# 或者: from Filter.Filter import strip [as 别名]
class POS(object):
    '''
    Class for POS tagging, use POS tagger from NLTK.
    '''

    def __init__(self):
        '''
        Constructor inisiates the filter. Along with the Taggers which will be used,
        And loads the copora. 
        '''   
        self.FF = Filter()
        
        try:
            #Attempt to open .plk file and load. 
            input = open("./Corpus/Brown-Uni.pkl", 'rb')
            self.unigram_tagger = load(input)
            input.close() 
        except IOError as e:   
            self.brown_tagged_sents = nltk.corpus.brown.tagged_sents(simplify_tags=True)
            t0 = nltk.DefaultTagger('NN')
            t1 = nltk.UnigramTagger(self.brown_tagged_sents, backoff=t0)
            t2 = nltk.BigramTagger(self.brown_tagged_sents, backoff=t1)
            self.unigram_tagger = nltk.UnigramTagger(self.brown_tagged_sents, backoff=t2)
            
            output = open("./Corpus/Brown-Uni.pkl", 'wb')
            dump(self.unigram_tagger, output, -1)
            output.close()
    
    def POSTag(self, text, s='false'):
        '''
        Method to POS tagged the Tokonized text.
        
        @param text: TOK text which is going to be POS tagged
        @param s: Whether is it a sentence of not. 
        
        @return: POSTaged version of input  
        '''
        if s == 'false':
            sentance = nltk.sent_tokenize(text)
            sentance = [nltk.word_tokenize(self.FF.strip(sent)) for sent in sentance]
            sentance = [self.unigram_tagger.tag(sent) for sent in sentance]
        elif s == 'tok':
            sentance = [self.unigram_tagger.tag(sent,) for sent in text]
        else:
            sentance = self.unigram_tagger.tag(text)
        
        
        return sentance
    
    def POSNgram(self, text, s='false', n=3):
        '''
        Method to POS tag N-grams 
        
        @param text: n-grams to be POS tagged
        @param s: Whether is it a sentence of not. 
        @param n: length of n gram  
        
        @return: POS-Tagged n-grams 
        '''
        if s == 'false':
            sentance = self.POSTag(text);
            sentence = [nltk.ngrams(sent, n) for sent in sentance]
        else:
            sentence = [nltk.ngrams(sent, n) for sent in text]
        
        return sentence
开发者ID:danjamker,项目名称:N-Fly,代码行数:68,代码来源:POS.py

示例2: NGram

# 需要导入模块: from Filter import Filter [as 别名]
# 或者: from Filter.Filter import strip [as 别名]
class NGram(object):
    '''
    classdocs
    '''

    def __init__(self):
        '''
        Constructor
        '''
        self.F = Filter()
        
    def Grams(self, pos, n=3, boundy=1):
        '''
        
        '''
        ngrams = []
        for x in range(2, n):
            ngrams.append(self.Gram(pos, n=x))
        
        tmp = []
        
        for x in range(0, n-2):
            tmp = tmp + ngrams[x]
                   
        return tmp
        
    def Gram(self, text, n=3, boundy=1):
        '''
        @param text: text to be created into Ngrams
        @param n: Length of Ngrams
        @param boundy: Number of instiances of gram
           
        @return: List of ngrams of text
        '''
        
        sentence = [nltk.ngrams(sent, n) for sent in text]
        t = []
        for s in sentence:
            t = t + s
            
        freq = nltk.FreqDist(t)
        
        tmp = []
        for f in freq.keys():
            if int(freq[f]) > boundy:
                tmp.append(f)
            
        return tmp
    
    def NGramUn(self, text, n=3):

        sentance = nltk.sent_tokenize(text)     
        sentance = [nltk.word_tokenize(self.F.strip(sent)) for sent in sentance]  
        sentence = [nltk.ngrams(sent, n) for sent in sentance]
    
        return sentence
    
    def capitalList(self, text):
        '''
        @param text: text input which has to be 
        @return: List of tagged words which havve all capitalized first letters
        ''' 
        tmp = []
        
        for sent in text:
            count = 0
            for word in sent:
                if (word[0][0].isupper() & count == 0) | (word[0][0].islower() & count > 0):
                    t = []
                    for x in range(count, len(sent)):
                        if  sent[x][0][0].isupper():
                            t.append(sent[x])
                        else:
                            if len(t) >= 2:
                                tmp.append(t)
                            t = []
                            break
                    
                count = count + 1
                
        return tmp
开发者ID:danjamker,项目名称:N-Fly,代码行数:83,代码来源:NGram.py


注:本文中的Filter.Filter.strip方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。