当前位置: 首页>>代码示例>>Python>>正文


Python FrequencyList.count方法代码示例

本文整理汇总了Python中pynlpl.statistics.FrequencyList.count方法的典型用法代码示例。如果您正苦于以下问题:Python FrequencyList.count方法的具体用法?Python FrequencyList.count怎么用?Python FrequencyList.count使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pynlpl.statistics.FrequencyList的用法示例。


在下文中一共展示了FrequencyList.count方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: process

# 需要导入模块: from pynlpl.statistics import FrequencyList [as 别名]
# 或者: from pynlpl.statistics.FrequencyList import count [as 别名]
def process(filename):
    print >>sys.stderr, "Processing " + filename
    doc = folia.Document(file=filename)

    freqlist = FrequencyList()
    
    if settings.n == 1:
        for word in doc.words():
            text = word.toktext()
            if settings.casesensitive: text = text.lower()
            freqlist.count(text)
    elif settings.sentencemarkers:
        for sentence in doc.sentences():
            for ngram in Windower(sentence.words(), settings.n):
                text = ' '.join([x for x in ngram.toktext() ])
                if settings.casesensitive: text = text.lower()
                freqlist.count(text)                
    else:
        for word in Windower(sentence.words(), settings.n, None, None):
            text = ' '.join([x for x in ngram.toktext() ])
            if settings.casesensitive: text = text.lower()
            freqlist.count(text)                        
                
    if settings.autooutput:                
        if filename[-len(settings.extension) - 1:].lower() == '.' +settings.extension:
            outfilename = filename[:-len(settings.extension) - 1] + '.freqlist'
        else:
            outfilename += '.freqlist'
        freqlist.save(outfilename,True)    
                                       
    return freqlist
开发者ID:larsmans,项目名称:folia,代码行数:33,代码来源:foliafreqlist.py

示例2: process

# 需要导入模块: from pynlpl.statistics import FrequencyList [as 别名]
# 或者: from pynlpl.statistics.FrequencyList import count [as 别名]
def process(filename):
    try:
        print("Processing " + filename,file=sys.stderr)
        doc = folia.Document(file=filename)

        freqlist = FrequencyList()

        if settings.n == 1:
            for word in doc.words():
                text = word.toktext()
                if settings.casesensitive: text = text.lower()
                freqlist.count(text)
        elif settings.sentencemarkers:
            for sentence in doc.sentences():
                for ngram in Windower(sentence.words(), settings.n):
                    text = ' '.join([x for x in ngram.toktext() ])
                    if settings.casesensitive: text = text.lower()
                    freqlist.count(text)
        else:
            for word in Windower(sentence.words(), settings.n, None, None):
                text = ' '.join([x for x in ngram.toktext() ])
                if settings.casesensitive: text = text.lower()
                freqlist.count(text)

        if settings.autooutput:
            if filename[-len(settings.extension) - 1:].lower() == '.' +settings.extension:
                outfilename = filename[:-len(settings.extension) - 1] + '.freqlist'
            else:
                outfilename += '.freqlist'
            freqlist.save(outfilename,True)
    except Exception as e:
        if settings.ignoreerrors:
            print("ERROR: An exception was raised whilst processing " + filename, e,file=sys.stderr)
        else:
            raise

    return freqlist
开发者ID:Sandy4321,项目名称:folia,代码行数:39,代码来源:foliafreqlist.py

示例3: open

# 需要导入模块: from pynlpl.statistics import FrequencyList [as 别名]
# 或者: from pynlpl.statistics.FrequencyList import count [as 别名]
#!/usr/bin/env python3

import sys
from pynlpl.statistics import FrequencyList

for filename in sys.argv[1:]:
    f_in = open(filename,'rt',encoding='utf-8')
    freqlist = FrequencyList()
    for line in f_in:
        fields = line.strip().split('\t')
        count = int(fields[1])
        for lemma in fields[0].split(' '):
            freqlist.count(lemma, count)
    f_in.close()
    freqlist.save(filename + '.freqlist')



开发者ID:pombredanne,项目名称:nlpsandbox,代码行数:17,代码来源:searchdidier_freqlist.py

示例4: __init__

# 需要导入模块: from pynlpl.statistics import FrequencyList [as 别名]
# 或者: from pynlpl.statistics.FrequencyList import count [as 别名]
class SimpleLanguageModel:
    """This is a simple unsmoothed language model. This class can both hold and compute the model."""

    def __init__(self, n=2, casesensitive = True, beginmarker = "<begin>", endmarker = "<end>"):
        self.casesensitive = casesensitive
        self.freqlistN = FrequencyList(None, self.casesensitive)
        self.freqlistNm1 = FrequencyList(None, self.casesensitive)

        assert isinstance(n,int) and n >= 2
        self.n = n
        self.beginmarker = beginmarker
        self.endmarker = endmarker
        self.sentences = 0

        if self.beginmarker:
            self._begingram = tuple([self.beginmarker] * (n-1))
        if self.endmarker:
            self._endgram = tuple([self.endmarker] * (n-1))

    def append(self, sentence):
        if isinstance(sentence, str) or isinstance(sentence, unicode):
            sentence = sentence.strip().split(' ')
        self.sentences += 1
        for ngram in Windower(sentence,self.n, self.beginmarker, self.endmarker):
            self.freqlistN.count(ngram)
        for ngram in Windower(sentence,self.n-1, self.beginmarker, self.endmarker):
            self.freqlistNm1.count(ngram)


    def load(self, filename):
        self.freqlistN = FrequencyList(None, self.casesensitive)
        self.freqlistNm1 = FrequencyList(None, self.casesensitive)
        f = io.open(filename,'r',encoding='utf-8')
        mode = False
        for line in f.readlines():
            line = line.strip()
            if line:
                if not mode:
                    if line != "[simplelanguagemodel]":
                        raise Exception("File is not a SimpleLanguageModel")
                    else:
                        mode = 1
                elif mode == 1:
                    if line[:2] == 'n=':
                        self.n = int(line[2:])
                    elif line[:12] == 'beginmarker=':
                        self.beginmarker = line[12:]
                    elif line[:10] == 'endmarker=':
                        self.endmarker = line[10:]
                    elif line[:10] == 'sentences=':
                        self.sentences = int(line[10:])
                    elif line[:14] == 'casesensitive=':
                        self.casesensitive = bool(int(line[14:]))
                        self.freqlistN = FrequencyList(None, self.casesensitive)
                        self.freqlistNm1 = FrequencyList(None, self.casesensitive)
                    elif line == "[freqlistN]":
                        mode = 2
                    else:
                        raise Exception("Syntax error in language model file: ", line)
                elif mode == 2:
                    if line == "[freqlistNm1]":
                        mode = 3
                    else:
                        try:
                            type, count = line.split("\t")
                            self.freqlistN.count(type.split(' '),int(count))
                        except:
                            print("Warning, could not parse line whilst loading frequency list: ", line,file=stderr)
                elif mode == 3:
                        try:
                            type, count = line.split("\t")
                            self.freqlistNm1.count(type.split(' '),int(count))
                        except:
                            print("Warning, could not parse line whilst loading frequency list: ", line,file=stderr)

        if self.beginmarker:
            self._begingram = [self.beginmarker] * (self.n-1)
        if self.endmarker:
            self._endgram = [self.endmarker] * (self.n-1)


    def save(self, filename):
        f = io.open(filename,'w',encoding='utf-8')
        f.write("[simplelanguagemodel]\n")
        f.write("n="+str(self.n)+"\n")
        f.write("sentences="+str(self.sentences)+"\n")
        f.write("beginmarker="+self.beginmarker+"\n")
        f.write("endmarker="+self.endmarker+"\n")
        f.write("casesensitive="+str(int(self.casesensitive))+"\n")
        f.write("\n")
        f.write("[freqlistN]\n")
        for line in self.freqlistN.output():
            f.write(line+"\n")
        f.write("[freqlistNm1]\n")
        for line in self.freqlistNm1.output():
            f.write(line+"\n")
        f.close()


    def scoresentence(self, sentence):
#.........这里部分代码省略.........
开发者ID:amitbeka,项目名称:pynlpl,代码行数:103,代码来源:lm.py

示例5: FrequencyList

# 需要导入模块: from pynlpl.statistics import FrequencyList [as 别名]
# 或者: from pynlpl.statistics.FrequencyList import count [as 别名]
    os.environ['PYTHONPATH'] = sys.path[0] + '/../..'

from pynlpl.formats.sonar import CorpusFiles, Corpus
from pynlpl.statistics import FrequencyList

sonardir = sys.argv[1]


freqlist = FrequencyList()
lemmapos_freqlist = FrequencyList()
poshead_freqlist = FrequencyList()
pos_freqlist = FrequencyList()

for i, doc in enumerate(Corpus(sonardir)):
    print >>sys.stderr, "#" + str(i) + " Processing " + doc.filename
    for word, id, pos, lemma in doc:
        freqlist.count(word)
        if lemma and pos:
            poshead = pos.split('(')[0]
            lemmapos_freqlist.count(lemma+'.'+poshead)
            poshead_freqlist.count(poshead)
            pos_freqlist.count(pos)

freqlist.save('sonarfreqlist.txt')
lemmapos_freqlist.save('sonarlemmaposfreqlist.txt')
poshead_freqlist.save('sonarposheadfreqlist.txt')
pos_freqlist.save('sonarposfreqlist.txt')

            
print unicode(freqlist).encode('utf-8')
开发者ID:Acidburn0zzz,项目名称:pynlpl,代码行数:32,代码来源:sonarfreqlist.py


注:本文中的pynlpl.statistics.FrequencyList.count方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。