本文整理匯總了Python中pynlpl.statistics.FrequencyList.save方法的典型用法代碼示例。如果您正苦於以下問題:Python FrequencyList.save方法的具體用法?Python FrequencyList.save怎麽用?Python FrequencyList.save使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pynlpl.statistics.FrequencyList
的用法示例。
在下文中一共展示了FrequencyList.save方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: process
# 需要導入模塊: from pynlpl.statistics import FrequencyList [as 別名]
# 或者: from pynlpl.statistics.FrequencyList import save [as 別名]
def process(filename):
print >>sys.stderr, "Processing " + filename
doc = folia.Document(file=filename)
freqlist = FrequencyList()
if settings.n == 1:
for word in doc.words():
text = word.toktext()
if settings.casesensitive: text = text.lower()
freqlist.count(text)
elif settings.sentencemarkers:
for sentence in doc.sentences():
for ngram in Windower(sentence.words(), settings.n):
text = ' '.join([x for x in ngram.toktext() ])
if settings.casesensitive: text = text.lower()
freqlist.count(text)
else:
for word in Windower(sentence.words(), settings.n, None, None):
text = ' '.join([x for x in ngram.toktext() ])
if settings.casesensitive: text = text.lower()
freqlist.count(text)
if settings.autooutput:
if filename[-len(settings.extension) - 1:].lower() == '.' +settings.extension:
outfilename = filename[:-len(settings.extension) - 1] + '.freqlist'
else:
outfilename += '.freqlist'
freqlist.save(outfilename,True)
return freqlist
示例2: main
# 需要導入模塊: from pynlpl.statistics import FrequencyList [as 別名]
# 或者: from pynlpl.statistics.FrequencyList import save [as 別名]
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], "o:OE:htspwrq", ["help"])
except getopt.GetoptError as err:
print(str(err),file=sys.stderr)
usage()
sys.exit(2)
outputfile = None
for o, a in opts:
if o == '-h' or o == '--help':
usage()
sys.exit(0)
elif o == '-e':
settings.encoding = a
elif o == '-E':
settings.extension = a
elif o == '-o':
outputfile = a
elif o == '-O':
settings.autooutput = True
elif o == '-s':
settings.sentencemarkers = True
elif o == '-r':
settings.recurse = True
elif o == '-q':
settings.ignoreerrors = True
else:
raise Exception("No such option: " + o)
if outputfile: outputfile = io.open(outputfile,'w',encoding=settings.encoding)
if len(sys.argv) >= 2:
freqlist = FrequencyList()
for x in sys.argv[1:]:
if os.path.isdir(x):
processdir(x,freqlist)
elif os.path.isfile(x):
freqlist += process(x)
else:
print("ERROR: File or directory not found: " + x,file=sys.stderr)
sys.exit(3)
if outputfile:
freqlist.save(outputfile, True)
else:
for line in freqlist.output("\t", True):
print(line)
else:
print("ERROR: No files specified",file=sys.stderr)
sys.exit(2)
示例3: process
# 需要導入模塊: from pynlpl.statistics import FrequencyList [as 別名]
# 或者: from pynlpl.statistics.FrequencyList import save [as 別名]
def process(filename):
try:
print("Processing " + filename,file=sys.stderr)
doc = folia.Document(file=filename)
freqlist = FrequencyList()
if settings.n == 1:
for word in doc.words():
text = word.toktext()
if settings.casesensitive: text = text.lower()
freqlist.count(text)
elif settings.sentencemarkers:
for sentence in doc.sentences():
for ngram in Windower(sentence.words(), settings.n):
text = ' '.join([x for x in ngram.toktext() ])
if settings.casesensitive: text = text.lower()
freqlist.count(text)
else:
for word in Windower(sentence.words(), settings.n, None, None):
text = ' '.join([x for x in ngram.toktext() ])
if settings.casesensitive: text = text.lower()
freqlist.count(text)
if settings.autooutput:
if filename[-len(settings.extension) - 1:].lower() == '.' +settings.extension:
outfilename = filename[:-len(settings.extension) - 1] + '.freqlist'
else:
outfilename += '.freqlist'
freqlist.save(outfilename,True)
except Exception as e:
if settings.ignoreerrors:
print("ERROR: An exception was raised whilst processing " + filename, e,file=sys.stderr)
else:
raise
return freqlist
示例4: open
# 需要導入模塊: from pynlpl.statistics import FrequencyList [as 別名]
# 或者: from pynlpl.statistics.FrequencyList import save [as 別名]
#!/usr/bin/env python3
import sys
from pynlpl.statistics import FrequencyList
for filename in sys.argv[1:]:
f_in = open(filename,'rt',encoding='utf-8')
freqlist = FrequencyList()
for line in f_in:
fields = line.strip().split('\t')
count = int(fields[1])
for lemma in fields[0].split(' '):
freqlist.count(lemma, count)
f_in.close()
freqlist.save(filename + '.freqlist')
示例5: FrequencyList
# 需要導入模塊: from pynlpl.statistics import FrequencyList [as 別名]
# 或者: from pynlpl.statistics.FrequencyList import save [as 別名]
os.environ['PYTHONPATH'] = sys.path[0] + '/../..'
from pynlpl.formats.sonar import CorpusFiles, Corpus
from pynlpl.statistics import FrequencyList
sonardir = sys.argv[1]
freqlist = FrequencyList()
lemmapos_freqlist = FrequencyList()
poshead_freqlist = FrequencyList()
pos_freqlist = FrequencyList()
for i, doc in enumerate(Corpus(sonardir)):
print >>sys.stderr, "#" + str(i) + " Processing " + doc.filename
for word, id, pos, lemma in doc:
freqlist.count(word)
if lemma and pos:
poshead = pos.split('(')[0]
lemmapos_freqlist.count(lemma+'.'+poshead)
poshead_freqlist.count(poshead)
pos_freqlist.count(pos)
freqlist.save('sonarfreqlist.txt')
lemmapos_freqlist.save('sonarlemmaposfreqlist.txt')
poshead_freqlist.save('sonarposheadfreqlist.txt')
pos_freqlist.save('sonarposfreqlist.txt')
print unicode(freqlist).encode('utf-8')
示例6: Exception
# 需要導入模塊: from pynlpl.statistics import FrequencyList [as 別名]
# 或者: from pynlpl.statistics.FrequencyList import save [as 別名]
settings.sentencemarkers = True
elif o == '-r':
settings.recurse = True
else:
raise Exception("No such option: " + o)
if outputfile: outputfile = codecs.open(outputfile,'w',settings.encoding)
if len(sys.argv) >= 2:
freqlist = FrequencyList()
for x in sys.argv[1:]:
if os.path.isdir(x):
processdir(x,freqlist)
elif os.path.isfile(x):
freqlist += process(x)
else:
print >>sys.stderr, "ERROR: File or directory not found: " + x
sys.exit(3)
if outputfile:
freqlist.save(outputfile, True)
else:
for line in freqlist.output("\t", True):
print line
else:
print >>sys.stderr,"ERROR: No files specified"
sys.exit(2)
if __name__ == "__main__":
main()