當前位置: 首頁>>代碼示例>>Python>>正文


Python FrequencyList.save方法代碼示例

本文整理匯總了Python中pynlpl.statistics.FrequencyList.save方法的典型用法代碼示例。如果您正苦於以下問題:Python FrequencyList.save方法的具體用法?Python FrequencyList.save怎麽用?Python FrequencyList.save使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在pynlpl.statistics.FrequencyList的用法示例。


在下文中一共展示了FrequencyList.save方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: process

# 需要導入模塊: from pynlpl.statistics import FrequencyList [as 別名]
# 或者: from pynlpl.statistics.FrequencyList import save [as 別名]
def process(filename):
    print >>sys.stderr, "Processing " + filename
    doc = folia.Document(file=filename)

    freqlist = FrequencyList()
    
    if settings.n == 1:
        for word in doc.words():
            text = word.toktext()
            if settings.casesensitive: text = text.lower()
            freqlist.count(text)
    elif settings.sentencemarkers:
        for sentence in doc.sentences():
            for ngram in Windower(sentence.words(), settings.n):
                text = ' '.join([x for x in ngram.toktext() ])
                if settings.casesensitive: text = text.lower()
                freqlist.count(text)                
    else:
        for word in Windower(sentence.words(), settings.n, None, None):
            text = ' '.join([x for x in ngram.toktext() ])
            if settings.casesensitive: text = text.lower()
            freqlist.count(text)                        
                
    if settings.autooutput:                
        if filename[-len(settings.extension) - 1:].lower() == '.' +settings.extension:
            outfilename = filename[:-len(settings.extension) - 1] + '.freqlist'
        else:
            outfilename += '.freqlist'
        freqlist.save(outfilename,True)    
                                       
    return freqlist
開發者ID:larsmans,項目名稱:folia,代碼行數:33,代碼來源:foliafreqlist.py

示例2: main

# 需要導入模塊: from pynlpl.statistics import FrequencyList [as 別名]
# 或者: from pynlpl.statistics.FrequencyList import save [as 別名]
def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], "o:OE:htspwrq", ["help"])
    except getopt.GetoptError as err:
        print(str(err),file=sys.stderr)
        usage()
        sys.exit(2)

    outputfile = None


    for o, a in opts:
        if o == '-h' or o == '--help':
            usage()
            sys.exit(0)
        elif o == '-e':
            settings.encoding = a
        elif o == '-E':
            settings.extension = a
        elif o == '-o':
            outputfile = a
        elif o == '-O':
            settings.autooutput = True
        elif o == '-s':
            settings.sentencemarkers = True
        elif o == '-r':
            settings.recurse = True
        elif o == '-q':
            settings.ignoreerrors = True
        else:
            raise Exception("No such option: " + o)


    if outputfile: outputfile = io.open(outputfile,'w',encoding=settings.encoding)

    if len(sys.argv) >= 2:
        freqlist = FrequencyList()
        for x in sys.argv[1:]:
            if os.path.isdir(x):
                processdir(x,freqlist)
            elif os.path.isfile(x):
                freqlist += process(x)
            else:
                print("ERROR: File or directory not found: " + x,file=sys.stderr)
                sys.exit(3)
        if outputfile:
            freqlist.save(outputfile, True)
        else:
            for line in freqlist.output("\t", True):
                print(line)
    else:
        print("ERROR: No files specified",file=sys.stderr)
        sys.exit(2)
開發者ID:Sandy4321,項目名稱:folia,代碼行數:55,代碼來源:foliafreqlist.py

示例3: process

# 需要導入模塊: from pynlpl.statistics import FrequencyList [as 別名]
# 或者: from pynlpl.statistics.FrequencyList import save [as 別名]
def process(filename):
    try:
        print("Processing " + filename,file=sys.stderr)
        doc = folia.Document(file=filename)

        freqlist = FrequencyList()

        if settings.n == 1:
            for word in doc.words():
                text = word.toktext()
                if settings.casesensitive: text = text.lower()
                freqlist.count(text)
        elif settings.sentencemarkers:
            for sentence in doc.sentences():
                for ngram in Windower(sentence.words(), settings.n):
                    text = ' '.join([x for x in ngram.toktext() ])
                    if settings.casesensitive: text = text.lower()
                    freqlist.count(text)
        else:
            for word in Windower(sentence.words(), settings.n, None, None):
                text = ' '.join([x for x in ngram.toktext() ])
                if settings.casesensitive: text = text.lower()
                freqlist.count(text)

        if settings.autooutput:
            if filename[-len(settings.extension) - 1:].lower() == '.' +settings.extension:
                outfilename = filename[:-len(settings.extension) - 1] + '.freqlist'
            else:
                outfilename += '.freqlist'
            freqlist.save(outfilename,True)
    except Exception as e:
        if settings.ignoreerrors:
            print("ERROR: An exception was raised whilst processing " + filename, e,file=sys.stderr)
        else:
            raise

    return freqlist
開發者ID:Sandy4321,項目名稱:folia,代碼行數:39,代碼來源:foliafreqlist.py

示例4: open

# 需要導入模塊: from pynlpl.statistics import FrequencyList [as 別名]
# 或者: from pynlpl.statistics.FrequencyList import save [as 別名]
#!/usr/bin/env python3

import sys
from pynlpl.statistics import FrequencyList

for filename in sys.argv[1:]:
    f_in = open(filename,'rt',encoding='utf-8')
    freqlist = FrequencyList()
    for line in f_in:
        fields = line.strip().split('\t')
        count = int(fields[1])
        for lemma in fields[0].split(' '):
            freqlist.count(lemma, count)
    f_in.close()
    freqlist.save(filename + '.freqlist')



開發者ID:pombredanne,項目名稱:nlpsandbox,代碼行數:17,代碼來源:searchdidier_freqlist.py

示例5: FrequencyList

# 需要導入模塊: from pynlpl.statistics import FrequencyList [as 別名]
# 或者: from pynlpl.statistics.FrequencyList import save [as 別名]
    os.environ['PYTHONPATH'] = sys.path[0] + '/../..'

from pynlpl.formats.sonar import CorpusFiles, Corpus
from pynlpl.statistics import FrequencyList

sonardir = sys.argv[1]


freqlist = FrequencyList()
lemmapos_freqlist = FrequencyList()
poshead_freqlist = FrequencyList()
pos_freqlist = FrequencyList()

for i, doc in enumerate(Corpus(sonardir)):
    print >>sys.stderr, "#" + str(i) + " Processing " + doc.filename
    for word, id, pos, lemma in doc:
        freqlist.count(word)
        if lemma and pos:
            poshead = pos.split('(')[0]
            lemmapos_freqlist.count(lemma+'.'+poshead)
            poshead_freqlist.count(poshead)
            pos_freqlist.count(pos)

freqlist.save('sonarfreqlist.txt')
lemmapos_freqlist.save('sonarlemmaposfreqlist.txt')
poshead_freqlist.save('sonarposheadfreqlist.txt')
pos_freqlist.save('sonarposfreqlist.txt')

            
print unicode(freqlist).encode('utf-8')
開發者ID:Acidburn0zzz,項目名稱:pynlpl,代碼行數:32,代碼來源:sonarfreqlist.py

示例6: Exception

# 需要導入模塊: from pynlpl.statistics import FrequencyList [as 別名]
# 或者: from pynlpl.statistics.FrequencyList import save [as 別名]
            settings.sentencemarkers = True
        elif o == '-r':
            settings.recurse = True
        else:            
            raise Exception("No such option: " + o)
                
    
    if outputfile: outputfile = codecs.open(outputfile,'w',settings.encoding)
        
    if len(sys.argv) >= 2:    
        freqlist = FrequencyList()
        for x in sys.argv[1:]:
            if os.path.isdir(x):
                processdir(x,freqlist)
            elif os.path.isfile(x):
                freqlist += process(x)
            else:
                print >>sys.stderr, "ERROR: File or directory not found: " + x
                sys.exit(3)    
        if outputfile:
            freqlist.save(outputfile, True)
        else:
            for line in freqlist.output("\t", True):
                print line
    else:
        print >>sys.stderr,"ERROR: No files specified"
        sys.exit(2)
            
if __name__ == "__main__":
    main()    
開發者ID:larsmans,項目名稱:folia,代碼行數:32,代碼來源:foliafreqlist.py


注:本文中的pynlpl.statistics.FrequencyList.save方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。