本文整理汇总了Python中Filter.Filter.strip方法的典型用法代码示例。如果您正苦于以下问题:Python Filter.strip方法的具体用法?Python Filter.strip怎么用?Python Filter.strip使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Filter.Filter
的用法示例。
在下文中一共展示了Filter.strip方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: POS
# 需要导入模块: from Filter import Filter [as 别名]
# 或者: from Filter.Filter import strip [as 别名]
class POS(object):
'''
Class for POS tagging, use POS tagger from NLTK.
'''
def __init__(self):
'''
Constructor inisiates the filter. Along with the Taggers which will be used,
And loads the copora.
'''
self.FF = Filter()
try:
#Attempt to open .plk file and load.
input = open("./Corpus/Brown-Uni.pkl", 'rb')
self.unigram_tagger = load(input)
input.close()
except IOError as e:
self.brown_tagged_sents = nltk.corpus.brown.tagged_sents(simplify_tags=True)
t0 = nltk.DefaultTagger('NN')
t1 = nltk.UnigramTagger(self.brown_tagged_sents, backoff=t0)
t2 = nltk.BigramTagger(self.brown_tagged_sents, backoff=t1)
self.unigram_tagger = nltk.UnigramTagger(self.brown_tagged_sents, backoff=t2)
output = open("./Corpus/Brown-Uni.pkl", 'wb')
dump(self.unigram_tagger, output, -1)
output.close()
def POSTag(self, text, s='false'):
'''
Method to POS tagged the Tokonized text.
@param text: TOK text which is going to be POS tagged
@param s: Whether is it a sentence of not.
@return: POSTaged version of input
'''
if s == 'false':
sentance = nltk.sent_tokenize(text)
sentance = [nltk.word_tokenize(self.FF.strip(sent)) for sent in sentance]
sentance = [self.unigram_tagger.tag(sent) for sent in sentance]
elif s == 'tok':
sentance = [self.unigram_tagger.tag(sent,) for sent in text]
else:
sentance = self.unigram_tagger.tag(text)
return sentance
def POSNgram(self, text, s='false', n=3):
'''
Method to POS tag N-grams
@param text: n-grams to be POS tagged
@param s: Whether is it a sentence of not.
@param n: length of n gram
@return: POS-Tagged n-grams
'''
if s == 'false':
sentance = self.POSTag(text);
sentence = [nltk.ngrams(sent, n) for sent in sentance]
else:
sentence = [nltk.ngrams(sent, n) for sent in text]
return sentence
示例2: NGram
# 需要导入模块: from Filter import Filter [as 别名]
# 或者: from Filter.Filter import strip [as 别名]
class NGram(object):
'''
classdocs
'''
def __init__(self):
'''
Constructor
'''
self.F = Filter()
def Grams(self, pos, n=3, boundy=1):
'''
'''
ngrams = []
for x in range(2, n):
ngrams.append(self.Gram(pos, n=x))
tmp = []
for x in range(0, n-2):
tmp = tmp + ngrams[x]
return tmp
def Gram(self, text, n=3, boundy=1):
'''
@param text: text to be created into Ngrams
@param n: Length of Ngrams
@param boundy: Number of instiances of gram
@return: List of ngrams of text
'''
sentence = [nltk.ngrams(sent, n) for sent in text]
t = []
for s in sentence:
t = t + s
freq = nltk.FreqDist(t)
tmp = []
for f in freq.keys():
if int(freq[f]) > boundy:
tmp.append(f)
return tmp
def NGramUn(self, text, n=3):
sentance = nltk.sent_tokenize(text)
sentance = [nltk.word_tokenize(self.F.strip(sent)) for sent in sentance]
sentence = [nltk.ngrams(sent, n) for sent in sentance]
return sentence
def capitalList(self, text):
'''
@param text: text input which has to be
@return: List of tagged words which havve all capitalized first letters
'''
tmp = []
for sent in text:
count = 0
for word in sent:
if (word[0][0].isupper() & count == 0) | (word[0][0].islower() & count > 0):
t = []
for x in range(count, len(sent)):
if sent[x][0][0].isupper():
t.append(sent[x])
else:
if len(t) >= 2:
tmp.append(t)
t = []
break
count = count + 1
return tmp