当前位置: 首页>>代码示例>>Python>>正文


Python FrequencyList.save方法代码示例

本文整理汇总了Python中pynlpl.statistics.FrequencyList.save方法的典型用法代码示例。如果您正苦于以下问题:Python FrequencyList.save方法的具体用法?Python FrequencyList.save怎么用?Python FrequencyList.save使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pynlpl.statistics.FrequencyList的用法示例。


在下文中一共展示了FrequencyList.save方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: process

# 需要导入模块: from pynlpl.statistics import FrequencyList [as 别名]
# 或者: from pynlpl.statistics.FrequencyList import save [as 别名]
def process(filename):
    print >>sys.stderr, "Processing " + filename
    doc = folia.Document(file=filename)

    freqlist = FrequencyList()
    
    if settings.n == 1:
        for word in doc.words():
            text = word.toktext()
            if settings.casesensitive: text = text.lower()
            freqlist.count(text)
    elif settings.sentencemarkers:
        for sentence in doc.sentences():
            for ngram in Windower(sentence.words(), settings.n):
                text = ' '.join([x for x in ngram.toktext() ])
                if settings.casesensitive: text = text.lower()
                freqlist.count(text)                
    else:
        for word in Windower(sentence.words(), settings.n, None, None):
            text = ' '.join([x for x in ngram.toktext() ])
            if settings.casesensitive: text = text.lower()
            freqlist.count(text)                        
                
    if settings.autooutput:                
        if filename[-len(settings.extension) - 1:].lower() == '.' +settings.extension:
            outfilename = filename[:-len(settings.extension) - 1] + '.freqlist'
        else:
            outfilename += '.freqlist'
        freqlist.save(outfilename,True)    
                                       
    return freqlist
开发者ID:larsmans,项目名称:folia,代码行数:33,代码来源:foliafreqlist.py

示例2: main

# 需要导入模块: from pynlpl.statistics import FrequencyList [as 别名]
# 或者: from pynlpl.statistics.FrequencyList import save [as 别名]
def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], "o:OE:htspwrq", ["help"])
    except getopt.GetoptError as err:
        print(str(err),file=sys.stderr)
        usage()
        sys.exit(2)

    outputfile = None


    for o, a in opts:
        if o == '-h' or o == '--help':
            usage()
            sys.exit(0)
        elif o == '-e':
            settings.encoding = a
        elif o == '-E':
            settings.extension = a
        elif o == '-o':
            outputfile = a
        elif o == '-O':
            settings.autooutput = True
        elif o == '-s':
            settings.sentencemarkers = True
        elif o == '-r':
            settings.recurse = True
        elif o == '-q':
            settings.ignoreerrors = True
        else:
            raise Exception("No such option: " + o)


    if outputfile: outputfile = io.open(outputfile,'w',encoding=settings.encoding)

    if len(sys.argv) >= 2:
        freqlist = FrequencyList()
        for x in sys.argv[1:]:
            if os.path.isdir(x):
                processdir(x,freqlist)
            elif os.path.isfile(x):
                freqlist += process(x)
            else:
                print("ERROR: File or directory not found: " + x,file=sys.stderr)
                sys.exit(3)
        if outputfile:
            freqlist.save(outputfile, True)
        else:
            for line in freqlist.output("\t", True):
                print(line)
    else:
        print("ERROR: No files specified",file=sys.stderr)
        sys.exit(2)
开发者ID:Sandy4321,项目名称:folia,代码行数:55,代码来源:foliafreqlist.py

示例3: process

# 需要导入模块: from pynlpl.statistics import FrequencyList [as 别名]
# 或者: from pynlpl.statistics.FrequencyList import save [as 别名]
def process(filename):
    try:
        print("Processing " + filename,file=sys.stderr)
        doc = folia.Document(file=filename)

        freqlist = FrequencyList()

        if settings.n == 1:
            for word in doc.words():
                text = word.toktext()
                if settings.casesensitive: text = text.lower()
                freqlist.count(text)
        elif settings.sentencemarkers:
            for sentence in doc.sentences():
                for ngram in Windower(sentence.words(), settings.n):
                    text = ' '.join([x for x in ngram.toktext() ])
                    if settings.casesensitive: text = text.lower()
                    freqlist.count(text)
        else:
            for word in Windower(sentence.words(), settings.n, None, None):
                text = ' '.join([x for x in ngram.toktext() ])
                if settings.casesensitive: text = text.lower()
                freqlist.count(text)

        if settings.autooutput:
            if filename[-len(settings.extension) - 1:].lower() == '.' +settings.extension:
                outfilename = filename[:-len(settings.extension) - 1] + '.freqlist'
            else:
                outfilename += '.freqlist'
            freqlist.save(outfilename,True)
    except Exception as e:
        if settings.ignoreerrors:
            print("ERROR: An exception was raised whilst processing " + filename, e,file=sys.stderr)
        else:
            raise

    return freqlist
开发者ID:Sandy4321,项目名称:folia,代码行数:39,代码来源:foliafreqlist.py

示例4: open

# 需要导入模块: from pynlpl.statistics import FrequencyList [as 别名]
# 或者: from pynlpl.statistics.FrequencyList import save [as 别名]
#!/usr/bin/env python3

import sys
from pynlpl.statistics import FrequencyList

for filename in sys.argv[1:]:
    f_in = open(filename,'rt',encoding='utf-8')
    freqlist = FrequencyList()
    for line in f_in:
        fields = line.strip().split('\t')
        count = int(fields[1])
        for lemma in fields[0].split(' '):
            freqlist.count(lemma, count)
    f_in.close()
    freqlist.save(filename + '.freqlist')



开发者ID:pombredanne,项目名称:nlpsandbox,代码行数:17,代码来源:searchdidier_freqlist.py

示例5: FrequencyList

# 需要导入模块: from pynlpl.statistics import FrequencyList [as 别名]
# 或者: from pynlpl.statistics.FrequencyList import save [as 别名]
    os.environ['PYTHONPATH'] = sys.path[0] + '/../..'

from pynlpl.formats.sonar import CorpusFiles, Corpus
from pynlpl.statistics import FrequencyList

sonardir = sys.argv[1]


freqlist = FrequencyList()
lemmapos_freqlist = FrequencyList()
poshead_freqlist = FrequencyList()
pos_freqlist = FrequencyList()

for i, doc in enumerate(Corpus(sonardir)):
    print >>sys.stderr, "#" + str(i) + " Processing " + doc.filename
    for word, id, pos, lemma in doc:
        freqlist.count(word)
        if lemma and pos:
            poshead = pos.split('(')[0]
            lemmapos_freqlist.count(lemma+'.'+poshead)
            poshead_freqlist.count(poshead)
            pos_freqlist.count(pos)

freqlist.save('sonarfreqlist.txt')
lemmapos_freqlist.save('sonarlemmaposfreqlist.txt')
poshead_freqlist.save('sonarposheadfreqlist.txt')
pos_freqlist.save('sonarposfreqlist.txt')

            
print unicode(freqlist).encode('utf-8')
开发者ID:Acidburn0zzz,项目名称:pynlpl,代码行数:32,代码来源:sonarfreqlist.py

示例6: Exception

# 需要导入模块: from pynlpl.statistics import FrequencyList [as 别名]
# 或者: from pynlpl.statistics.FrequencyList import save [as 别名]
            settings.sentencemarkers = True
        elif o == '-r':
            settings.recurse = True
        else:            
            raise Exception("No such option: " + o)
                
    
    if outputfile: outputfile = codecs.open(outputfile,'w',settings.encoding)
        
    if len(sys.argv) >= 2:    
        freqlist = FrequencyList()
        for x in sys.argv[1:]:
            if os.path.isdir(x):
                processdir(x,freqlist)
            elif os.path.isfile(x):
                freqlist += process(x)
            else:
                print >>sys.stderr, "ERROR: File or directory not found: " + x
                sys.exit(3)    
        if outputfile:
            freqlist.save(outputfile, True)
        else:
            for line in freqlist.output("\t", True):
                print line
    else:
        print >>sys.stderr,"ERROR: No files specified"
        sys.exit(2)
            
if __name__ == "__main__":
    main()    
开发者ID:larsmans,项目名称:folia,代码行数:32,代码来源:foliafreqlist.py


注:本文中的pynlpl.statistics.FrequencyList.save方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。