本文整理汇总了Python中Preprocessor.Preprocessor.preprocess方法的典型用法代码示例。如果您正苦于以下问题:Python Preprocessor.preprocess方法的具体用法?Python Preprocessor.preprocess怎么用?Python Preprocessor.preprocess使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Preprocessor.Preprocessor
的用法示例。
在下文中一共展示了Preprocessor.preprocess方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: trainClassifier
# 需要导入模块: from Preprocessor import Preprocessor [as 别名]
# 或者: from Preprocessor.Preprocessor import preprocess [as 别名]
def trainClassifier(self, trainLetter, progress, progLab, maxSets):
# nacitanie a predspracovanie signalu
signalLoader = SignalLoader(self.chanNum,self.files)
prpr = Preprocessor(self.chanNum,[])
signal,stimCode,phaseInSequence = signalLoader.loadSignal()
self.signal = prpr.preprocess(240,1E-1,30E0,self.sf,signal,stimCode,phaseInSequence,0)
self.stimulusCode = prpr.stimulusCode
self.phaseInSequence = prpr.phaseInSequence
self.targetLetters = sum(trainLetter,[])
# najdenie prechodov medzi znakmi
charEnds = self.findCharEnds()
# rozdelenie dat do epoch
em = EpochManager(self.signal,self.stimulusCode,self.phaseInSequence)
isiList = em.createEpochs()
# trening jednotlivych znakov
for i in range(len(charEnds)):
progress["value"] = i
progLab["text"] = ("Trénujem znak: {}/{}").format(i+1, len(charEnds))
print "Averaging character:",i,"\n"
hi = charEnds[i]
if i == 0:
lo = 0
else:
lo = charEnds[i-1]
rowColBinList = em.getAveragedEpochs(hi,lo,isiList,maxSets)
finalDataArray = rowColBinList
classMarks = self.prepairTargetArray(self.getCharIndexes(self.targetLetters[i]))
if self.firsttrain == 1:
self.cl.learn(finalDataArray,classMarks,0)
self.firsttrain = 0
else:
self.cl.learn(finalDataArray,classMarks)
示例2: __init__
# 需要导入模块: from Preprocessor import Preprocessor [as 别名]
# 或者: from Preprocessor.Preprocessor import preprocess [as 别名]
class TopicModelHarness:
def __init__(self, getTitle, getBody, getUrl):
self.getTitle = getTitle
self.getBody = getBody
self.getUrl = getUrl
def getRaw(self, path):
raw = read_csv(path, sep='\t', na_values=['?']).fillna(-5)
return raw
def getColumns(self, raw):
boilerplate = raw['boilerplate']
urlid = raw['urlid']
if 'label' in raw:
labels = raw['label']
return boilerplate, labels, urlid
return boilerplate, urlid
def getDocs(self, boilerplate):
docs = []
for row in boilerplate:
rowObject = json.loads(row)
doc = ''
if 'title' in rowObject and rowObject['title'] and self.getTitle:
doc += rowObject['title']
if 'body' in rowObject and rowObject['body'] and self.getBody:
doc += ' ' + rowObject['body']
if 'url' in rowObject and rowObject['url'] and self.getUrl:
doc += ' ' + rowObject['url']
docs.append(doc)
return docs
def tag(self, str, tag):
strList = str.split(' ')
newstr = ''
for s in strList:
if s.lower() not in ENGLISH_STOP_WORDS:
newstr += tag + '__' + s + ' ' + s + ' '
return newstr
def preprocessDocs(self, docs):
preprocessed_docs = []
for doc in docs:
punctuation = [',','.', ';', '!', '?', ':']
for p in punctuation:
doc = doc.replace(p, ' ' + p + ' ')
doc = doc.lower()
preprocessed_docs.append(doc)
return preprocessed_docs
def expandVocab(self, docs):
print 'expanding vocabulary...'
freqCounts = self.countTokens(docs)
tokenList = []
freqCountList = []
for token in freqCounts:
tokenList.append(token)
freqCountList.append(freqCounts[token])
expTokenDf = DataFrame({'tokens': tokenList, 'freqCounts': freqCountList})
expTokenDf = expTokenDf.sort('freqCounts', ascending=False)
expandableTokensFiltered = set(expTokenDf['tokens'][2000:3000]).difference(ENGLISH_STOP_WORDS)
batchSize = 10000
print "%d filtered tokens chosen" % len(expandableTokensFiltered)
print "Expandable tokens: "
print expandableTokensFiltered
newDocs = []
for i in xrange(0,len(docs)):
doc = docs[i]
newDocSplit = doc.split()
tokenList = doc.split(' ')
start = 0
newTokens = set()
while start < len(tokenList):
stop = start + batchSize
tokens = set(tokenList[start:stop])
start = start + batchSize/2
tokensToExpand = tokens.intersection(expandableTokensFiltered)
newTokens = newTokens.union(self.expandVocabFromSet(tokensToExpand))
newDocSplit.extend(list(newTokens))
newDoc = ''
for token in newDocSplit:
newDoc += ' ' + token + ' '
newDocs.append(newDoc)
if i % 500 == 0:
print '\nprocessed %d docs' % i
print '%d new tokens added to document' % len(newTokens)
print 'new tokens:'
print newTokens
print len(tokens)
return newDocs
def expandVocabFromSet(self, tokensToExpand):
expanded = set()
#.........这里部分代码省略.........
示例3: guessChars
# 需要导入模块: from Preprocessor import Preprocessor [as 别名]
# 或者: from Preprocessor.Preprocessor import preprocess [as 别名]
def guessChars(self,subset,files,targetLetter,testProgress,progTestLabel,guessView,guessLab,maxSets):
aktCharNum = 0
totalChars = len(sum(targetLetter,[]))
if self.chanNum != 64:
files.sort()
files = self.createTriplets(files)
for m in range(len(files)):
# nacitanie a predspracovanie signalu
signalLoader = SignalLoader(self.chanNum,files[m])
prpr = Preprocessor(self.chanNum,subset)
signal, stimCode, phaseInSequence = signalLoader.loadSignal()
self.signal = prpr.preprocess(240,1E-1,30E0,self.sf,signal,stimCode,phaseInSequence,1)
self.stimulusCode = prpr.stimulusCode
self.phaseInSequence = prpr.phaseInSequence
if (len(targetLetter) > m):
self.targetLetters = targetLetter[m]
else:
self.targetLetters = []
print "Processing file:",m,"\n"
# najdenie prechodov medzi znakmi
charEnds = self.findCharEnds()
# rozdelenie dat do epoch
em = EpochManager(self.signal,self.stimulusCode,self.phaseInSequence)
isiList = em.createEpochs()
hit = 0
# hadanie jednotlivych znakov
for i in range(len(charEnds)):
testProgress["value"] = aktCharNum
progTestLabel["text"] = ("Hádam znak: {}/{}").format(aktCharNum+1, totalChars)
aktCharNum +=1
hi = charEnds[i]
if i == 0:
lo = 0
else:
lo = charEnds[i-1]
rowColBinList = em.getAveragedEpochs(hi,lo,isiList,maxSets)
finalDataArray = self.prepairSignalArray(self.sf.grandAveragingFilter(rowColBinList,subset,1))
#pomocou klasifikatora
char = self.cl.predictTarget(finalDataArray,self.cl.reduce(self.sf,self,subset))
if len(self.targetLetters) > i:
if char == self.targetLetters[i]:
hit+=1
print "Succesfully guessed char:",char,"\n"
else:
print "Guessed char:",char,"\n"
if i == 0:
text = "(" + char + ","
elif i == len(charEnds) - 1:
text = char + ")"
else:
text = char + ","
guessView.configure(state='normal')
guessView.insert(INSERT, text)
guessView.configure(state='disabled')
self.rate += (hit)*100/float(totalChars)
print "\n Success rate= ",self.rate, "\n"
guessLab["text"]=("Presnosť: {}").format(self.rate)
return self.rate