本文整理汇总了Python中arff.load方法的典型用法代码示例。如果您正苦于以下问题:Python arff.load方法的具体用法?Python arff.load怎么用?Python arff.load使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类arff
的用法示例。
在下文中一共展示了arff.load方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: read_arff
# 需要导入模块: import arff [as 别名]
# 或者: from arff import load [as 别名]
def read_arff(file_path, misplaced_list):
misplaced = False
for item in misplaced_list:
if item in file_path:
misplaced = True
file = arff.load(open(file_path))
data_value = np.asarray(file['data'])
attributes = file['attributes']
X = data_value[:, 0:-2]
if not misplaced:
y = data_value[:, -1]
else:
y = data_value[:, -2]
y[y == 'no'] = 0
y[y == 'yes'] = 1
y = y.astype('float').astype('int').ravel()
if y.sum() > len(y):
print(attributes)
raise ValueError('wrong sum')
return X, y, attributes
示例2: gsCreation
# 需要导入模块: import arff [as 别名]
# 或者: from arff import load [as 别名]
def gsCreation():
#We load ARFF files countaining ratings
print("Reading individual ratings...")
rGoldIndiv = openingRatingIndividual()
print("Computing inter-rater agreement on raw...")
seq = []
for i in range(v.nAn):
seq.append(i)
#We take the combination list for each rater
combnk = combinListe(seq,2)
#We get the names of files
files = listFiles()
#We compute the agreement between each rater of this list
ra = ratersAgreement(rGoldIndiv, combnk, files)
#We compute the agreement of each rater
aRa = raterAgreement(ra, combnk, files)
#print aRa
#print sum(aRa)
print("Perform CCC centring...")
cccCentring(ra, combnk, files, aRa, rGoldIndiv)
#End gsCreation
示例3: load_iot
# 需要导入模块: import arff [as 别名]
# 或者: from arff import load [as 别名]
def load_iot():
""" Loads iot data
Sensor stream contains information (temperature, humidity, light, and sensor voltage) collected from 54 sensors deployed
in Intel Berkeley Research Lab. The whole stream contains consecutive information recorded over a 2 months
period (1 reading per 1-3 minutes). I used the sensor ID as the class label, so the learning task of the stream is
to correctly identify the sensor ID (1 out of 54 sensors) purely based on the sensor data and the corresponding recording
time.
While the data stream flow over time, so does the concepts underlying the stream. For example, the lighting during
the working hours is generally stronger than the night, and the temperature of specific sensors (conference room)
may regularly rise during the meetings.
Returns
-------
pandas DataFrame
"""
dataset = arff.load(open(reduce(os.path.join, _IOT_PATH, _get_datapath())))
columns = [i[0] for i in dataset['attributes']]
return pd.DataFrame(dataset['data'], columns=columns)
示例4: load_bci
# 需要导入模块: import arff [as 别名]
# 或者: from arff import load [as 别名]
def load_bci():
""" Loads BCI data
Contains measurements from 64 EEG sensors on the scalp of a single participant.
The purpose of the recording is to determine from the electrical brain activity when the participant is paying attention.
Returns
-------
A tuple containing four numpy arrays
train features
train labels
test features
test labels
"""
npzfile = np.load(reduce(os.path.join, _BCI_PATH, _get_datapath()))
return npzfile['train_X'], npzfile['train_y'], npzfile['test_X'], npzfile['test_y']
示例5: main
# 需要导入模块: import arff [as 别名]
# 或者: from arff import load [as 别名]
def main(opts):
for ai, afile in tqdm.tqdm(enumerate(opts.arff_files), total=len(opts.arff_files)):
with open(afile) as af:
data = arff.load(af)
attrs = [at[0] for at in data['attributes']]
f0_idx = attrs.index('F0_sma')
data = data['data']
array = []
X = []
for dpoint in data:
# ignore name, timestamp and class
f0_val = dpoint[f0_idx]
if f0_val > 0:
dpoint[f0_idx] = np.log(f0_val)
else:
dpoint[f0_idx] = -1e10
array.append(dpoint[2:-1])
array = np.array(array, dtype=np.float32)
lf0, _ = interpolation(array[:, -1], -1e10)
array[:, -1] = lf0
if opts.out_stats is not None:
X.append(array)
npfile = os.path.splitext(afile)[0]
np.save(os.path.join(npfile), array.T)
if opts.out_stats is not None:
X = np.concatenate(X, axis=0)
mn = np.mean(X, axis=0)
sd = np.std(X, axis=0)
with open(opts.out_stats, 'wb') as out_f:
pickle.dump({'mean':mn, 'std':sd}, out_f)
示例6: create_dataframe
# 需要导入模块: import arff [as 别名]
# 或者: from arff import load [as 别名]
def create_dataframe(data_path=None, records=None, features=None):
if data_path:
ds = DataFrame({'data_path': data_path})
ds.load(features = features)
else:
ds = DataFrame({})
ds.load_records(records, features=features)
return ds
示例7: cccCentring
# 需要导入模块: import arff [as 别名]
# 或者: from arff import load [as 别名]
def cccCentring(ra, combnk, files, aRa, rGoldIndiv):
for i in range(len(v.eName)):
for f, fname in enumerate(files[i][0]):
meanByF = []
wghRater = []
csv = rGoldIndiv[v.eName[i]][f]
#Firstly we compute the mean of all raters for each file
for a in range(v.nAn):
#We get the mean
meanRatersF = np.nanmean(csv[:,a+1])
meanByF.append(meanRatersF)
#We take the weight of the rater in this file
wghRater.append(aRa[a][i][f])
#Now we calculate the ponderate mean of all raters
pondMean = np.sum(np.multiply(meanByF,wghRater))/np.sum(aRa[:,i,f])
#We have the mean of all raters, we need the total mean of the file
meanF = np.nanmean(csv[:,1:])
#Now we will center each prediction according to the mean
output = []
#We prepare the ARFF file, we get the template
data = arff.load(open(v.arffTempPath,'rb'))
for line in range(len(csv)-1):
meanLine = np.nanmean(csv[line+1,1:])
newGs = meanLine-meanF+pondMean
#We replace the values in the ARFF template
data["data"][line][0] = fname.replace(".csv","")
data["data"][line][1] = round(csv[line+1,0],2)
data["data"][line][2] = round(newGs,6)
#We write the csv in the Gold Standard folder
f = open(v.agsc[i]+fname.replace(".csv",".arff"), "w")
f.write(arff.dumps(data))
return None
#End cccCentring
示例8: restaurObject
# 需要导入模块: import arff [as 别名]
# 或者: from arff import load [as 别名]
def restaurObject(addr):
f = open(addr,"rb")
obj = cPickle.load(f)
f.close()
return obj
#End restaurObject
#Augment the tab to take context
示例9: unimodalPredPrep
# 需要导入模块: import arff [as 别名]
# 或者: from arff import load [as 别名]
def unimodalPredPrep(wSize, wStep, nMod):
feats = {}
#We need the number of line for a wStep of v.tsp
trainLen = len(arff.load(open(v.descNorm[nMod]+"train_"+str(wSize)+"_"+str(v.tsp)+".arff","rb"))['data'])
#We open corresponding files
for s in v.part:
feats[s] = arff.load(open(v.descNorm[nMod]+s+"_"+str(wSize)+"_"+str(wStep)+".arff","rb"))
#We put to 0 NaN values
feats[s] = arffNan(feats[s])
#We transform it in array
feats[s] = np.array(feats[s]['data'])
#We resample it to be at a wSize of v.tsp
feats[s] = resamplingTab(feats[s], trainLen)
return feats, trainLen
#End unimodalPredPrep
示例10: concArff
# 需要导入模块: import arff [as 别名]
# 或者: from arff import load [as 别名]
def concArff(sourceD, fNames, destinationD, fileName):
try :
fNames = sorted(fNames)
warnings.filterwarnings('ignore', category=UnicodeWarning)
arffs = {}
long = 0
b = 0
#We verify that the file dont already exist
if (not os.path.isfile(destinationD+fileName)) :
for i in range(len(fNames)):
if (os.path.isfile(sourceD+fNames[i])):
#We search for the corresponding descriptor with the parameters
if (i == 0):
arffs = arff.load(open(sourceD+fNames[i],"rb"))
long = len(arffs['data'])
else :
d = arff.load(open(sourceD+fNames[i],"rb"))
if (len(d['data']) != long):
while(len(d['data']) != long):
lastInd = len(d['data'])-1
if (len(d['data']) > long):
del(d['data'][lastInd])
else :
d['data'].append(d['data'][lastInd])
arffs['data'] += d['data']
else:
b = 1
else :
b = 2
if (b == 0):
f = open(destinationD+fileName, "w")
arffs = removeColArff(arffs)
f.write(arff.dumps(arffs))
return b
except KeyboardInterrupt:
os.remove(destinationD+fileName)
raise
#End concatenationArff : Return 0 if the file is written, 1 if one of the files was missing, 2 if the file already exists
#Concatenation of golds standards per partition (test/dev/train)
示例11: load_dataset_dump
# 需要导入模块: import arff [as 别名]
# 或者: from arff import load [as 别名]
def load_dataset_dump(filename):
"""Loads a compressed data set dump
Parameters
----------
filename : str
path to dump file, if without .bz2 ending, the .bz2 extension will be appended.
Returns
-------
X : `array_like`, :class:`numpy.matrix` or :mod:`scipy.sparse` matrix, shape=(n_samples, n_features)
input feature matrix
y : `array_like`, :class:`numpy.matrix` or :mod:`scipy.sparse` matrix of `{0, 1}`, shape=(n_samples, n_labels)
binary indicator matrix with label assignments
names of attributes: List[str]
list of attribute names for `X` columns
names of labels: List[str]
list of label names for `y` columns
"""
if not os.path.exists(filename):
raise IOError("File {} does not exist, use load_dataset to download file".format(filename))
if filename[-4:] != '.bz2':
filename += ".bz2"
with bz2.BZ2File(filename, "r") as file_handle:
data = pickle.load(file_handle)
return data['X'], data['y'], data['features'], data['labels']
示例12: read_ARFF2
# 需要导入模块: import arff [as 别名]
# 或者: from arff import load [as 别名]
def read_ARFF2(file, label_col = None):
data = arff.load(open(file,'r'))
data = pd.DataFrame(data['data'])
X=data
y=None
X,y =_data_to_matrix(X,label_col)
return X,y
示例13: load
# 需要导入模块: import arff [as 别名]
# 或者: from arff import load [as 别名]
def load(self, features=None, nrows=None):
self.categoricals = {}
self.transforms_log = [[],[],[],[]]
import csv
from io import StringIO
path = self.options['data_path']
if isinstance(path, StringIO):
path.seek(0)
self.df = pd.read_csv(path, encoding='utf-8', escapechar="\\", usecols=features, na_values=['?'], nrows=nrows)
if self.options.get("targetFeature") in self.df.columns:
self.dropna([self.options["targetFeature"]])
else:
if path.startswith("jdbc:"):
import psycopg2
from psycopg2.extensions import parse_dsn
path = path.replace('sslfactory=org.postgresql.ssl.NonValidatingFactory&', '')
ary = path.split('tablename')
path = ary[0]
tablename = ary[1]
dataset_name = tablename
self.dbconn_args = parse_dsn(path[5:])
conn = psycopg2.connect(**self.dbconn_args)
self.df = pd.read_sql("select * from %s"%tablename, con=conn)
else:
path, remote_path = self._check_remote_path()
try:
self.df = self.load_from_file(path, features=features, nrows=nrows)
except:
if remote_path:
logging.exception("Loading local file failed. Download it again...")
self.options['data_path'] = remote_path
path, remote_path = self._check_remote_path(force_download=True)
self.df = self.load_from_file(path, features=features, nrows=nrows)
else:
raise
self.dataset_name = os.path.basename(path)
if self.options.get("targetFeature") in self.df.columns:
self.dropna([self.options["targetFeature"]])
return self