本文整理汇总了Python中nltk.probability.FreqDist.copy方法的典型用法代码示例。如果您正苦于以下问题:Python FreqDist.copy方法的具体用法?Python FreqDist.copy怎么用?Python FreqDist.copy使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.probability.FreqDist
的用法示例。
在下文中一共展示了FreqDist.copy方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: findKeyword
# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import copy [as 别名]
def findKeyword(fname,apply=False,eventflg =False):
with open(fname ,'r', encoding='utf-8', errors='ignore') as file : #Opening file
text=file.read().lower()
#finding tokens using regular expression
tokens=re.findall('[a-zA-Z0-9]+',text)
#tokens = [token for token in tokens]
#Removing stopwords from tokens
#Stopwords defined in Python std library
stopw = set(stopwords.words('english'))
tokens = [token for token in tokens]
#Removing stopwords from tokens
#Stopwords are defined in stopwords.txt
tokenscopy=tokens.copy()
for token in tokenscopy:
if token in stopw:
tokens.remove(token)
elif token in stopwd:
tokens.remove(token)
#Finding mostcommon stopwords and their count
tokens = FreqDist(tokens).most_common()
if eventflg:
tokenscopy = tokens.copy()
for token in tokenscopy:
if token in dictionary:
tokens.remove(token)
if apply:
return tokens,text
keyword=[token[0] for token in tokens]
return keyword
示例2: while
# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import copy [as 别名]
fdict.update(list4)
count+=1
print 'Size of dictionary:',len(fdict)
print ''
f=open('stoplist.txt', 'r')
stoplist=[]
ban='IV'
while(ban!=''):
ban=f.readline()
stoplist.append(ban.strip())
f.close()
banset = set(stoplist)
fdict2=fdict.copy()
for w in fdict.keys()[:]:
if w.strip() in banset or len(w.strip()) < 3 :
del fdict2[w]
elif isinstance(w, unicode):
del fdict2[w]
print 'Size of dictionary',len(fdict2)
print 'Exporting the dictionary...'
fout = open("../classification/bag_nl.dat", "wb")
pickle.dump(fdict2, fout, protocol=0)
fout.close()