本文整理匯總了Python中Filter.Filter.strip方法的典型用法代碼示例。如果您正苦於以下問題:Python Filter.strip方法的具體用法?Python Filter.strip怎麽用?Python Filter.strip使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類Filter.Filter
的用法示例。
在下文中一共展示了Filter.strip方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: POS
# 需要導入模塊: from Filter import Filter [as 別名]
# 或者: from Filter.Filter import strip [as 別名]
class POS(object):
'''
Class for POS tagging, use POS tagger from NLTK.
'''
def __init__(self):
'''
Constructor inisiates the filter. Along with the Taggers which will be used,
And loads the copora.
'''
self.FF = Filter()
try:
#Attempt to open .plk file and load.
input = open("./Corpus/Brown-Uni.pkl", 'rb')
self.unigram_tagger = load(input)
input.close()
except IOError as e:
self.brown_tagged_sents = nltk.corpus.brown.tagged_sents(simplify_tags=True)
t0 = nltk.DefaultTagger('NN')
t1 = nltk.UnigramTagger(self.brown_tagged_sents, backoff=t0)
t2 = nltk.BigramTagger(self.brown_tagged_sents, backoff=t1)
self.unigram_tagger = nltk.UnigramTagger(self.brown_tagged_sents, backoff=t2)
output = open("./Corpus/Brown-Uni.pkl", 'wb')
dump(self.unigram_tagger, output, -1)
output.close()
def POSTag(self, text, s='false'):
'''
Method to POS tagged the Tokonized text.
@param text: TOK text which is going to be POS tagged
@param s: Whether is it a sentence of not.
@return: POSTaged version of input
'''
if s == 'false':
sentance = nltk.sent_tokenize(text)
sentance = [nltk.word_tokenize(self.FF.strip(sent)) for sent in sentance]
sentance = [self.unigram_tagger.tag(sent) for sent in sentance]
elif s == 'tok':
sentance = [self.unigram_tagger.tag(sent,) for sent in text]
else:
sentance = self.unigram_tagger.tag(text)
return sentance
def POSNgram(self, text, s='false', n=3):
'''
Method to POS tag N-grams
@param text: n-grams to be POS tagged
@param s: Whether is it a sentence of not.
@param n: length of n gram
@return: POS-Tagged n-grams
'''
if s == 'false':
sentance = self.POSTag(text);
sentence = [nltk.ngrams(sent, n) for sent in sentance]
else:
sentence = [nltk.ngrams(sent, n) for sent in text]
return sentence
示例2: NGram
# 需要導入模塊: from Filter import Filter [as 別名]
# 或者: from Filter.Filter import strip [as 別名]
class NGram(object):
'''
classdocs
'''
def __init__(self):
'''
Constructor
'''
self.F = Filter()
def Grams(self, pos, n=3, boundy=1):
'''
'''
ngrams = []
for x in range(2, n):
ngrams.append(self.Gram(pos, n=x))
tmp = []
for x in range(0, n-2):
tmp = tmp + ngrams[x]
return tmp
def Gram(self, text, n=3, boundy=1):
'''
@param text: text to be created into Ngrams
@param n: Length of Ngrams
@param boundy: Number of instiances of gram
@return: List of ngrams of text
'''
sentence = [nltk.ngrams(sent, n) for sent in text]
t = []
for s in sentence:
t = t + s
freq = nltk.FreqDist(t)
tmp = []
for f in freq.keys():
if int(freq[f]) > boundy:
tmp.append(f)
return tmp
def NGramUn(self, text, n=3):
sentance = nltk.sent_tokenize(text)
sentance = [nltk.word_tokenize(self.F.strip(sent)) for sent in sentance]
sentence = [nltk.ngrams(sent, n) for sent in sentance]
return sentence
def capitalList(self, text):
'''
@param text: text input which has to be
@return: List of tagged words which havve all capitalized first letters
'''
tmp = []
for sent in text:
count = 0
for word in sent:
if (word[0][0].isupper() & count == 0) | (word[0][0].islower() & count > 0):
t = []
for x in range(count, len(sent)):
if sent[x][0][0].isupper():
t.append(sent[x])
else:
if len(t) >= 2:
tmp.append(t)
t = []
break
count = count + 1
return tmp