本文整理汇总了Python中pywebhdfs.webhdfs.PyWebHdfsClient.read_file方法的典型用法代码示例。如果您正苦于以下问题:Python PyWebHdfsClient.read_file方法的具体用法?Python PyWebHdfsClient.read_file怎么用?Python PyWebHdfsClient.read_file使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pywebhdfs.webhdfs.PyWebHdfsClient
的用法示例。
在下文中一共展示了PyWebHdfsClient.read_file方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: WhenTestingOpenOperation
# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import read_file [as 别名]
class WhenTestingOpenOperation(unittest.TestCase):
def setUp(self):
self.host = 'hostname'
self.port = '00000'
self.user_name = 'username'
self.webhdfs = PyWebHdfsClient(host=self.host, port=self.port,
user_name=self.user_name)
self.response = MagicMock()
self.requests = MagicMock(return_value=self.response)
self.path = 'user/hdfs'
self.file_data = '010101'
self.response = MagicMock()
self.response.text = self.file_data
def test_read_throws_exception_for_not_ok(self):
self.response.status_code = httplib.BAD_REQUEST
self.requests.get.return_value = self.response
with patch('pywebhdfs.webhdfs.requests', self.requests):
with self.assertRaises(errors.PyWebHdfsException):
self.webhdfs.read_file(self.path)
def test_read_returns_file(self):
self.response.status_code = httplib.OK
self.requests.get.return_value = self.response
with patch('pywebhdfs.webhdfs.requests', self.requests):
result = self.webhdfs.read_file(self.path)
self.assertEqual(result, self.file_data)
示例2: run
# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import read_file [as 别名]
def run(self):
if ("agg" in self.arg):
#reading from a file to memory to stream later
with open(self.path,"rb") as f:
self.data_holder['data'] = json.dumps(cPickle.load(f));
#indicating that reading in memory is finished for this data
self.data_holder["indicator"]='ready';
elif("raw" in self.arg):
from pywebhdfs.webhdfs import PyWebHdfsClient;
hdfs = PyWebHdfsClient(host='cshadoop.boisestate.edu',port='50070', user_name='uacharya');
file_path = 'user/uacharya/flow/'+str(self.arg['d'])+'/node_'+str(self.arg['n'])+'/output.csv'
#reading the csv files in the memory
self.data_holder['data']= hdfs.read_file(file_path,buffersize=4096)
self.data_holder["indicator"]='ready';
elif("bitmap" in self.arg):
#putting the line data into a object to stream
with open(self.path+"/data.json","rb")as f:
self.data_holder['data'] = json.dumps(cPickle.load(f));
# with open(self.path+"\\data.json","rb")as f:
# output = cPickle.load(f);
#not loading images into memory if there is none images
if(self.data_holder['data']=='""'):
#indicating that reading in memory is finished for this data
self.data_holder['frames']=(0,[]);
self.data_holder["indicator"]='ready';
return;
# if(not output):
# self.data_holder['data']= msgpack.packb(output,use_bin_type=True);
# self.data_holder["indicator"]='ready';
# return;
#just in case there is some data to stream add all the PNGS to a list
# output['frames']=[];
content_length =0; #calculate the content length in bytes of all images to stream in total
PNGS=[]; #list to hold all the pngs data in memory
#reading all the images to memory to stream
for x in xrange(1,31):
buf_string = cStringIO.StringIO();
Image.open(self.path+"/imgs/"+str(x)+".png").save(buf_string, format="PNG", quality=100);
content_length = content_length+(buf_string.tell()+4);
PNGS.append(struct.pack('>I',buf_string.tell())+buf_string.getvalue());
buf_string.close();
# for x in xrange(1,31):
# buf_string = cStringIO.StringIO();
# Image.open(self.path+"\\imgs\\"+str(x)+".png").save(buf_string, format="PNG", quality=100);
# output['frames'].append(buf_string.getvalue());
# buf_string.close();
self.data_holder['frames']=(content_length,PNGS);
# self.data_holder['data']=msgpack.packb(output,use_bin_type=True);
#indicating that reading in memory is finished for this data
self.data_holder["indicator"]='ready';
else:
raise InvalidFormatError("the type of format is not available to read in memory");
示例3: load
# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import read_file [as 别名]
def load(name, chains=None, model=None, host='localhost', port='50070', user_name=None):
'''
Load text database
Parameters
----------
name : str
Path to root directory in HDFS for text database without a leading '/'
chains : list
Chains to load. If None, all chains are loaded
model : Model
If None, the model is taken from the 'with' context
host : str
The IP address or hostname of the HDFS namenode. By default,
it is 'localhost'
port : str
The port number for WebHDFS on the namenode. By default, it
is '50070'
user_name : str
WebHDFS user_name used for authentication. By default, it is
None
Returns
-------
ndarray.Trace instance
'''
hdfs = PyWebHdfsClient(host=host, port=port, user_name=user_name)
chain_dirs = _get_chain_dirs(name, hdfs)
if chains is None:
chains = list(chain_dirs.keys())
traces = []
for chain in chains:
chain_dir = chain_dirs[chain]
dir_path = os.path.join(name, chain_dir)
shape_file = os.path.join(dir_path, 'shapes.json')
shapes = json.load(StringIO.StringIO(hdfs.read_file(shape_file)))
samples = {}
for varname, shape in shapes.items():
var_file = os.path.join(dir_path, varname + '.txt')
samples[varname] = np.loadtxt(StringIO.StringIO(str(hdfs.read_file(var_file)))).reshape(shape)
trace = NDArray(model=model)
trace.samples = samples
trace.chain = chain
traces.append(trace)
return base.MultiTrace(traces)
示例4: WhenTestingOpenOperation
# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import read_file [as 别名]
class WhenTestingOpenOperation(unittest.TestCase):
def setUp(self):
self.host = 'hostname'
self.port = '00000'
self.user_name = 'username'
self.webhdfs = PyWebHdfsClient(host=self.host, port=self.port,
user_name=self.user_name)
self.response = MagicMock()
self.requests = MagicMock(return_value=self.response)
self.path = 'user/hdfs'
self.file_data = u'010101'
self.response = MagicMock()
self.response.content = self.file_data
def test_read_throws_exception_for_not_ok(self):
self.response.status_code = http_client.BAD_REQUEST
self.requests.return_value = self.response
with patch('requests.sessions.Session.get', self.requests):
with self.assertRaises(errors.PyWebHdfsException):
self.webhdfs.read_file(self.path)
def test_read_returns_file(self):
self.response.status_code = http_client.OK
self.requests.return_value = self.response
with patch('requests.sessions.Session.get', self.requests):
result = self.webhdfs.read_file(self.path)
self.assertEqual(result, self.file_data)
def test_stream_returns_generator(self):
self.response.status_code = http_client.OK
self.requests.return_value = self.response
with patch('requests.sessions.Session.get', self.requests):
result = self.webhdfs.stream_file(self.path)
self.assertIsInstance(result, types.GeneratorType)
示例5: __init__
# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import read_file [as 别名]
#.........这里部分代码省略.........
{
'meta_data': {'version': version}
}
)
if status != 200:
if self.debug:
print "Error inserting: version"
except:
if self.debug:
print "Upload failed."
return False
if self.debug:
print "[Uploaded]", file, "version:", version
return True
def download(self, file, version=None, download_dir=''):
''' This function use to retrieve or download file from hdfs. Then save
it as a new file named (v[version].[file] - For example, v1.mytext.txt).
You can specify the directory of downloaded file. You can also specify
file's version for downloading if not it will be version 1.
:param : file - file's name
:param : version - file's version (default: 1)
:param : download_dir - download directory (default: '' or current directory
NOTE: it must end with '/' - For example, '../download/')
:return: True if success otherwise false.
'''
if not version:
version = self.get_lastest_version(file)
key = ''.join(['v',str(version),'.',file])
path = ''.join([self.hdfs_path,key])
downloaded_file = ''.join([download_dir,key])
try:
f = open(downloaded_file, 'w')
f.write(self.hdfs.read_file(path))
f.close()
except:
if self.debug:
print "Cannot download a file:", file
return False
if self.debug:
print "[Downloaded]",key
return True
def update(self, file, version=None):
''' This function use to update file to hdfs and data stored in hbase by
overwrite that file on hdfs, and also insert new data to hbase too. You can
specify a file's version in order to update it.
:param : file - file's name
:param : version - file's version
:return: True if success otherwise False.
'''
if not version:
version = self.get_lastest_version(file)
key = ''.join(['v',str(version),'.',file])
path = ''.join([self.hdfs_path,key])
# Read a file
try:
f = open(file,'r')
file_content = f.read()
f.close()
except:
print "Cannot read file:",file
# Try to upload file.
try:
示例6: print
# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import read_file [as 别名]
print(file_checksum)
# append to the file created in previous step
print('appending to file at: {0}\n'.format(example_file))
hdfs.append_file(example_file, example_data)
file_status = hdfs.get_file_dir_status(example_file)
print(file_status)
# checksum reflects file changes
file_checksum = hdfs.get_file_checksum(example_file)
print(file_checksum)
# read in the data for the file
print('reading data from file at: {0}\n'.format(example_file))
file_data = hdfs.read_file(example_file)
print(file_data)
# rename the example_dir
print('renaming directory from {0} to {1}\n').format(example_dir, rename_dir)
hdfs.rename_file_dir(example_dir, '/{0}'.format(rename_dir))
# list the contents of the new directory
listdir_stats = hdfs.list_dir(rename_dir)
print(listdir_stats)
example_file = '{dir}/example.txt'.format(dir=rename_dir)
# delete the example file
print('deleting example file at: {0}'.format(example_file))
hdfs.delete_file_dir(example_file)
示例7: HDFS
# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import read_file [as 别名]
class HDFS(object):
def __init__(self, host, port, user):
self._hdfs = PyWebHdfsClient(
host=host, port=port, user_name=user, timeout=None)
logging.debug('webhdfs = %[email protected]%s:%s', user, host, port)
def recursive_copy(self, local_path, remote_path, exclude=None):
if exclude is None:
exclude = []
c_path = canonicalize(remote_path)
logging.debug('making %s', c_path)
self._hdfs.make_dir(c_path)
fs_g = os.walk(local_path)
for dpath, dnames, fnames in fs_g:
_, relative_path = dpath.split(local_path)
for dname in dnames:
if dname not in exclude:
c_path = canonicalize(
'%s/%s/%s' %
(remote_path, relative_path, dname))
logging.debug('making %s', c_path)
self._hdfs.make_dir(c_path)
for fname in fnames:
if fname not in exclude:
data = file(
canonicalize(
'%s/%s/%s' %
(local_path, relative_path, fname)), 'rb')
c_path = canonicalize(
'%s/%s/%s' %
(remote_path, relative_path, fname))
logging.debug('creating %s', c_path)
self._hdfs.create_file(c_path, data, overwrite=True)
data.close()
def make_dir(self, path):
logging.debug('make_dir: %s', path)
self._hdfs.make_dir(canonicalize(path))
def create_file(self, data, remote_file_path):
logging.debug('create_file: %s', remote_file_path)
sio = StringIO.StringIO(data)
self._hdfs.create_file(
canonicalize(remote_file_path),
sio,
overwrite=True)
def append_file(self, data, remote_file_path):
logging.debug('append to: %s', remote_file_path)
self._hdfs.append_file(canonicalize(remote_file_path), data)
def stream_file_to_disk(self, remote_file_path, local_file_path):
chunk_size = 10*1024*1024
offset = 0
with open(local_file_path, 'wb') as dest_file:
data = self._hdfs.read_file(canonicalize(remote_file_path), offset=offset, length=chunk_size)
while True:
dest_file.write(data)
if len(data) < chunk_size:
break
offset += chunk_size
data = self._hdfs.read_file(canonicalize(remote_file_path), offset=offset, length=chunk_size)
def read_file(self, remote_file_path):
data = self._hdfs.read_file(canonicalize(remote_file_path))
return data
def remove(self, path, recursive=False):
logging.debug('remove: %s', path)
self._hdfs.delete_file_dir(canonicalize(path), recursive)
示例8: PyWebHdfsClient
# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import read_file [as 别名]
from pywebhdfs.webhdfs import PyWebHdfsClient
hdfs = PyWebHdfsClient(host='localhost',port='50070',user_name='vagrant')
my_file = 'user/vagrant/hdfs-test/data.dat'
print 'Status of file: ', my_file
status = hdfs.get_file_dir_status(my_file)
print status
print 'Second 500 bytes of file: ',my_file
data = hdfs.read_file(my_file,offset=500,length=500)
print data
示例9: PyWebHdfsClient
# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import read_file [as 别名]
# -*- coding: UTF8 -*-
from pywebhdfs.webhdfs import PyWebHdfsClient
my_file = '/di/logs/raw_logs/rings.event/day=20160429/2016042900rings.event.1461859205239'
hdfs = PyWebHdfsClient(host='10.160.241.61',port='14000', user_name='hdfs')
hdfs.read_file(my_file)
示例10: range
# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import read_file [as 别名]
import re
import pickle
THRIFTNODE='data2'
client=PyWebHdfsClient(host='namenode',port='50070',user_name='root')
conn=happybase.Connection(THRIFTNODE)
crawls=conn.table('crawls')
MAXLOCALLINKCOUNT = 30
timeout = 5
socket.setdefaulttimeout(timeout)
DATESTRING=str(time.strftime('%Y%m%d'))
ANET=187
for BNET in range(5,10):
SCANSITESFILE=str(ANET)+'-'+str(BNET)+'-p80.log'
FNAME='user/root/scans/'+str(ANET)+'/'+SCANSITESFILE
SSFP=open(SCANSITESFILE,'w')
SSFP.write(client.read_file(FNAME))
SSFP.close()
try:
ifp=open(SCANSITESFILE,'r')
except:
continue
for line in ifp:
line = line.strip().split()
url = 'http://'+str(line[1])+'/'
req = urllib2.Request(url)
html = ''
try:
html = urllib2.urlopen(req)
except:
print ' url open exception on '+str(url)
continue