当前位置: 首页>>代码示例>>Python>>正文


Python Preprocessor.preprocess方法代码示例

本文整理汇总了Python中Preprocessor.Preprocessor.preprocess方法的典型用法代码示例。如果您正苦于以下问题:Python Preprocessor.preprocess方法的具体用法?Python Preprocessor.preprocess怎么用?Python Preprocessor.preprocess使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Preprocessor.Preprocessor的用法示例。


在下文中一共展示了Preprocessor.preprocess方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: trainClassifier

# 需要导入模块: from Preprocessor import Preprocessor [as 别名]
# 或者: from Preprocessor.Preprocessor import preprocess [as 别名]
    def trainClassifier(self, trainLetter, progress, progLab, maxSets):

        # nacitanie a predspracovanie signalu
        signalLoader = SignalLoader(self.chanNum,self.files)
        prpr = Preprocessor(self.chanNum,[])
        signal,stimCode,phaseInSequence = signalLoader.loadSignal()
        self.signal = prpr.preprocess(240,1E-1,30E0,self.sf,signal,stimCode,phaseInSequence,0)
        self.stimulusCode = prpr.stimulusCode
        self.phaseInSequence = prpr.phaseInSequence
        self.targetLetters = sum(trainLetter,[])

        # najdenie prechodov medzi znakmi
        charEnds = self.findCharEnds()

        # rozdelenie dat do epoch
        em = EpochManager(self.signal,self.stimulusCode,self.phaseInSequence)
        isiList = em.createEpochs()

        # trening jednotlivych znakov
        for i in range(len(charEnds)):
            progress["value"] = i
            progLab["text"] = ("Trénujem znak: {}/{}").format(i+1, len(charEnds))
            print "Averaging character:",i,"\n"
            hi = charEnds[i]
            if i == 0:
                lo = 0
            else:
                lo = charEnds[i-1]

            rowColBinList = em.getAveragedEpochs(hi,lo,isiList,maxSets)
            finalDataArray = rowColBinList
            classMarks = self.prepairTargetArray(self.getCharIndexes(self.targetLetters[i]))

            if self.firsttrain == 1:
                self.cl.learn(finalDataArray,classMarks,0)
                self.firsttrain = 0
            else:
                self.cl.learn(finalDataArray,classMarks)
开发者ID:BergiSK,项目名称:Bakalarka,代码行数:40,代码来源:Processor.py

示例2: __init__

# 需要导入模块: from Preprocessor import Preprocessor [as 别名]
# 或者: from Preprocessor.Preprocessor import preprocess [as 别名]
class TopicModelHarness:
    def __init__(self, getTitle, getBody, getUrl):
        self.getTitle = getTitle
        self.getBody = getBody
        self.getUrl = getUrl

    def getRaw(self, path):
        raw = read_csv(path, sep='\t', na_values=['?']).fillna(-5)
        return raw

    def getColumns(self, raw):
        boilerplate = raw['boilerplate']
        urlid = raw['urlid']
        if 'label' in raw:
            labels = raw['label']
            return boilerplate, labels, urlid
        return boilerplate, urlid

    def getDocs(self, boilerplate):
        docs = []
        for row in boilerplate:
            rowObject = json.loads(row)
            doc = ''
            if 'title' in rowObject and rowObject['title'] and self.getTitle:
                doc += rowObject['title']
            if 'body' in rowObject and rowObject['body'] and self.getBody:
                doc += ' ' + rowObject['body']
            if 'url' in rowObject and rowObject['url'] and self.getUrl:
                doc += ' ' + rowObject['url']
            docs.append(doc)
        return docs

    def tag(self, str, tag):
        strList = str.split(' ')
        newstr = ''
        for s in strList:
            if s.lower() not in ENGLISH_STOP_WORDS:
                newstr += tag + '__' + s + ' ' + s + ' '
        return newstr

    def preprocessDocs(self, docs):
        preprocessed_docs = []
        for doc in docs:
            punctuation = [',','.', ';', '!', '?', ':']
            for p in punctuation:
                doc = doc.replace(p, ' ' + p + ' ')

            doc = doc.lower()
            preprocessed_docs.append(doc)

        return preprocessed_docs

    def expandVocab(self, docs):
        print 'expanding vocabulary...'
        freqCounts = self.countTokens(docs)

        tokenList = []
        freqCountList = []
        for token in freqCounts:
            tokenList.append(token)
            freqCountList.append(freqCounts[token])

        expTokenDf = DataFrame({'tokens': tokenList, 'freqCounts': freqCountList})
        expTokenDf = expTokenDf.sort('freqCounts', ascending=False)
        expandableTokensFiltered = set(expTokenDf['tokens'][2000:3000]).difference(ENGLISH_STOP_WORDS)
        batchSize = 10000
        print "%d filtered tokens chosen" % len(expandableTokensFiltered)
        print "Expandable tokens: "
        print expandableTokensFiltered
        newDocs = []
        for i in xrange(0,len(docs)):
            doc = docs[i]
            newDocSplit = doc.split()
            tokenList = doc.split(' ')
            start = 0
            newTokens = set()
            while start < len(tokenList):
                stop = start + batchSize
                tokens = set(tokenList[start:stop])
                start = start + batchSize/2
                tokensToExpand = tokens.intersection(expandableTokensFiltered)
                newTokens = newTokens.union(self.expandVocabFromSet(tokensToExpand))

            newDocSplit.extend(list(newTokens))
            newDoc = ''
            for token in newDocSplit:
                newDoc += ' ' + token + ' '
            newDocs.append(newDoc)

            if i % 500 == 0:
                print '\nprocessed %d docs' % i
                print '%d new tokens added to document' % len(newTokens)
                print 'new tokens:'
                print newTokens
                print len(tokens)

        return newDocs

    def expandVocabFromSet(self, tokensToExpand):
        expanded = set()
#.........这里部分代码省略.........
开发者ID:anthonygarvan,项目名称:evergreen,代码行数:103,代码来源:main.py

示例3: guessChars

# 需要导入模块: from Preprocessor import Preprocessor [as 别名]
# 或者: from Preprocessor.Preprocessor import preprocess [as 别名]
    def guessChars(self,subset,files,targetLetter,testProgress,progTestLabel,guessView,guessLab,maxSets):
        aktCharNum = 0
        totalChars = len(sum(targetLetter,[]))

        if self.chanNum != 64:
            files.sort()
            files = self.createTriplets(files)


        for m in range(len(files)):
            # nacitanie a predspracovanie signalu
            signalLoader = SignalLoader(self.chanNum,files[m])
            prpr = Preprocessor(self.chanNum,subset)
            signal, stimCode, phaseInSequence = signalLoader.loadSignal()
            self.signal = prpr.preprocess(240,1E-1,30E0,self.sf,signal,stimCode,phaseInSequence,1)
            self.stimulusCode = prpr.stimulusCode
            self.phaseInSequence = prpr.phaseInSequence
            if (len(targetLetter) > m):
                self.targetLetters = targetLetter[m]
            else:
                self.targetLetters = []
            print "Processing file:",m,"\n"

            # najdenie prechodov medzi znakmi
            charEnds = self.findCharEnds()

            # rozdelenie dat do epoch
            em = EpochManager(self.signal,self.stimulusCode,self.phaseInSequence)
            isiList = em.createEpochs()

            hit = 0
            # hadanie jednotlivych znakov
            for i in range(len(charEnds)):
                testProgress["value"] = aktCharNum
                progTestLabel["text"] = ("Hádam znak: {}/{}").format(aktCharNum+1, totalChars)
                aktCharNum +=1

                hi = charEnds[i]
                if i == 0:
                    lo = 0
                else:
                    lo = charEnds[i-1]

                rowColBinList = em.getAveragedEpochs(hi,lo,isiList,maxSets)
                finalDataArray = self.prepairSignalArray(self.sf.grandAveragingFilter(rowColBinList,subset,1))

                #pomocou klasifikatora
                char = self.cl.predictTarget(finalDataArray,self.cl.reduce(self.sf,self,subset))

                if len(self.targetLetters) > i:
                    if char == self.targetLetters[i]:
                        hit+=1
                        print "Succesfully guessed char:",char,"\n"
                    else:
                        print "Guessed char:",char,"\n"


                if i == 0:
                    text = "(" + char + ","
                elif i == len(charEnds) - 1:
                    text = char + ")"
                else:
                    text = char + ","

                guessView.configure(state='normal')
                guessView.insert(INSERT, text)
                guessView.configure(state='disabled')

            self.rate += (hit)*100/float(totalChars)
            print "\n Success rate= ",self.rate, "\n"
            guessLab["text"]=("Presnosť: {}").format(self.rate)

        return self.rate
开发者ID:BergiSK,项目名称:Bakalarka,代码行数:75,代码来源:Processor.py


注:本文中的Preprocessor.Preprocessor.preprocess方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。