本文整理汇总了Python中pynlpl.statistics.FrequencyList.count方法的典型用法代码示例。如果您正苦于以下问题:Python FrequencyList.count方法的具体用法?Python FrequencyList.count怎么用?Python FrequencyList.count使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pynlpl.statistics.FrequencyList
的用法示例。
在下文中一共展示了FrequencyList.count方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process
# 需要导入模块: from pynlpl.statistics import FrequencyList [as 别名]
# 或者: from pynlpl.statistics.FrequencyList import count [as 别名]
def process(filename):
print >>sys.stderr, "Processing " + filename
doc = folia.Document(file=filename)
freqlist = FrequencyList()
if settings.n == 1:
for word in doc.words():
text = word.toktext()
if settings.casesensitive: text = text.lower()
freqlist.count(text)
elif settings.sentencemarkers:
for sentence in doc.sentences():
for ngram in Windower(sentence.words(), settings.n):
text = ' '.join([x for x in ngram.toktext() ])
if settings.casesensitive: text = text.lower()
freqlist.count(text)
else:
for word in Windower(sentence.words(), settings.n, None, None):
text = ' '.join([x for x in ngram.toktext() ])
if settings.casesensitive: text = text.lower()
freqlist.count(text)
if settings.autooutput:
if filename[-len(settings.extension) - 1:].lower() == '.' +settings.extension:
outfilename = filename[:-len(settings.extension) - 1] + '.freqlist'
else:
outfilename += '.freqlist'
freqlist.save(outfilename,True)
return freqlist
示例2: process
# 需要导入模块: from pynlpl.statistics import FrequencyList [as 别名]
# 或者: from pynlpl.statistics.FrequencyList import count [as 别名]
def process(filename):
try:
print("Processing " + filename,file=sys.stderr)
doc = folia.Document(file=filename)
freqlist = FrequencyList()
if settings.n == 1:
for word in doc.words():
text = word.toktext()
if settings.casesensitive: text = text.lower()
freqlist.count(text)
elif settings.sentencemarkers:
for sentence in doc.sentences():
for ngram in Windower(sentence.words(), settings.n):
text = ' '.join([x for x in ngram.toktext() ])
if settings.casesensitive: text = text.lower()
freqlist.count(text)
else:
for word in Windower(sentence.words(), settings.n, None, None):
text = ' '.join([x for x in ngram.toktext() ])
if settings.casesensitive: text = text.lower()
freqlist.count(text)
if settings.autooutput:
if filename[-len(settings.extension) - 1:].lower() == '.' +settings.extension:
outfilename = filename[:-len(settings.extension) - 1] + '.freqlist'
else:
outfilename += '.freqlist'
freqlist.save(outfilename,True)
except Exception as e:
if settings.ignoreerrors:
print("ERROR: An exception was raised whilst processing " + filename, e,file=sys.stderr)
else:
raise
return freqlist
示例3: open
# 需要导入模块: from pynlpl.statistics import FrequencyList [as 别名]
# 或者: from pynlpl.statistics.FrequencyList import count [as 别名]
#!/usr/bin/env python3
import sys
from pynlpl.statistics import FrequencyList
for filename in sys.argv[1:]:
f_in = open(filename,'rt',encoding='utf-8')
freqlist = FrequencyList()
for line in f_in:
fields = line.strip().split('\t')
count = int(fields[1])
for lemma in fields[0].split(' '):
freqlist.count(lemma, count)
f_in.close()
freqlist.save(filename + '.freqlist')
示例4: __init__
# 需要导入模块: from pynlpl.statistics import FrequencyList [as 别名]
# 或者: from pynlpl.statistics.FrequencyList import count [as 别名]
class SimpleLanguageModel:
"""This is a simple unsmoothed language model. This class can both hold and compute the model."""
def __init__(self, n=2, casesensitive = True, beginmarker = "<begin>", endmarker = "<end>"):
self.casesensitive = casesensitive
self.freqlistN = FrequencyList(None, self.casesensitive)
self.freqlistNm1 = FrequencyList(None, self.casesensitive)
assert isinstance(n,int) and n >= 2
self.n = n
self.beginmarker = beginmarker
self.endmarker = endmarker
self.sentences = 0
if self.beginmarker:
self._begingram = tuple([self.beginmarker] * (n-1))
if self.endmarker:
self._endgram = tuple([self.endmarker] * (n-1))
def append(self, sentence):
if isinstance(sentence, str) or isinstance(sentence, unicode):
sentence = sentence.strip().split(' ')
self.sentences += 1
for ngram in Windower(sentence,self.n, self.beginmarker, self.endmarker):
self.freqlistN.count(ngram)
for ngram in Windower(sentence,self.n-1, self.beginmarker, self.endmarker):
self.freqlistNm1.count(ngram)
def load(self, filename):
self.freqlistN = FrequencyList(None, self.casesensitive)
self.freqlistNm1 = FrequencyList(None, self.casesensitive)
f = io.open(filename,'r',encoding='utf-8')
mode = False
for line in f.readlines():
line = line.strip()
if line:
if not mode:
if line != "[simplelanguagemodel]":
raise Exception("File is not a SimpleLanguageModel")
else:
mode = 1
elif mode == 1:
if line[:2] == 'n=':
self.n = int(line[2:])
elif line[:12] == 'beginmarker=':
self.beginmarker = line[12:]
elif line[:10] == 'endmarker=':
self.endmarker = line[10:]
elif line[:10] == 'sentences=':
self.sentences = int(line[10:])
elif line[:14] == 'casesensitive=':
self.casesensitive = bool(int(line[14:]))
self.freqlistN = FrequencyList(None, self.casesensitive)
self.freqlistNm1 = FrequencyList(None, self.casesensitive)
elif line == "[freqlistN]":
mode = 2
else:
raise Exception("Syntax error in language model file: ", line)
elif mode == 2:
if line == "[freqlistNm1]":
mode = 3
else:
try:
type, count = line.split("\t")
self.freqlistN.count(type.split(' '),int(count))
except:
print("Warning, could not parse line whilst loading frequency list: ", line,file=stderr)
elif mode == 3:
try:
type, count = line.split("\t")
self.freqlistNm1.count(type.split(' '),int(count))
except:
print("Warning, could not parse line whilst loading frequency list: ", line,file=stderr)
if self.beginmarker:
self._begingram = [self.beginmarker] * (self.n-1)
if self.endmarker:
self._endgram = [self.endmarker] * (self.n-1)
def save(self, filename):
f = io.open(filename,'w',encoding='utf-8')
f.write("[simplelanguagemodel]\n")
f.write("n="+str(self.n)+"\n")
f.write("sentences="+str(self.sentences)+"\n")
f.write("beginmarker="+self.beginmarker+"\n")
f.write("endmarker="+self.endmarker+"\n")
f.write("casesensitive="+str(int(self.casesensitive))+"\n")
f.write("\n")
f.write("[freqlistN]\n")
for line in self.freqlistN.output():
f.write(line+"\n")
f.write("[freqlistNm1]\n")
for line in self.freqlistNm1.output():
f.write(line+"\n")
f.close()
def scoresentence(self, sentence):
#.........这里部分代码省略.........
示例5: FrequencyList
# 需要导入模块: from pynlpl.statistics import FrequencyList [as 别名]
# 或者: from pynlpl.statistics.FrequencyList import count [as 别名]
os.environ['PYTHONPATH'] = sys.path[0] + '/../..'
from pynlpl.formats.sonar import CorpusFiles, Corpus
from pynlpl.statistics import FrequencyList
sonardir = sys.argv[1]
freqlist = FrequencyList()
lemmapos_freqlist = FrequencyList()
poshead_freqlist = FrequencyList()
pos_freqlist = FrequencyList()
for i, doc in enumerate(Corpus(sonardir)):
print >>sys.stderr, "#" + str(i) + " Processing " + doc.filename
for word, id, pos, lemma in doc:
freqlist.count(word)
if lemma and pos:
poshead = pos.split('(')[0]
lemmapos_freqlist.count(lemma+'.'+poshead)
poshead_freqlist.count(poshead)
pos_freqlist.count(pos)
freqlist.save('sonarfreqlist.txt')
lemmapos_freqlist.save('sonarlemmaposfreqlist.txt')
poshead_freqlist.save('sonarposheadfreqlist.txt')
pos_freqlist.save('sonarposfreqlist.txt')
print unicode(freqlist).encode('utf-8')