Python PyWebHdfsClient.read_file方法代码示例

本文整理汇总了Python中pywebhdfs.webhdfs.PyWebHdfsClient.read_file方法的典型用法代码示例。如果您正苦于以下问题：Python PyWebHdfsClient.read_file方法的具体用法？Python PyWebHdfsClient.read_file怎么用？Python PyWebHdfsClient.read_file使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pywebhdfs.webhdfs.PyWebHdfsClient的用法示例。

在下文中一共展示了PyWebHdfsClient.read_file方法的10个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: WhenTestingOpenOperation

# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import read_file [as 别名]
class WhenTestingOpenOperation(unittest.TestCase):

    def setUp(self):

        self.host = 'hostname'
        self.port = '00000'
        self.user_name = 'username'
        self.webhdfs = PyWebHdfsClient(host=self.host, port=self.port,
                                       user_name=self.user_name)
        self.response = MagicMock()
        self.requests = MagicMock(return_value=self.response)
        self.path = 'user/hdfs'
        self.file_data = '010101'
        self.response = MagicMock()
        self.response.text = self.file_data

    def test_read_throws_exception_for_not_ok(self):

        self.response.status_code = httplib.BAD_REQUEST
        self.requests.get.return_value = self.response
        with patch('pywebhdfs.webhdfs.requests', self.requests):
            with self.assertRaises(errors.PyWebHdfsException):
                self.webhdfs.read_file(self.path)

    def test_read_returns_file(self):

        self.response.status_code = httplib.OK
        self.requests.get.return_value = self.response
        with patch('pywebhdfs.webhdfs.requests', self.requests):
            result = self.webhdfs.read_file(self.path)
        self.assertEqual(result, self.file_data)

开发者ID:waliaashish85，项目名称:pywebhdfs，代码行数:33，代码来源:test_webhdfs.py

示例2: run

# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import read_file [as 别名]
    def run(self):
        if ("agg" in self.arg):
            #reading from a file to memory to stream later
            with open(self.path,"rb") as f:
                self.data_holder['data'] = json.dumps(cPickle.load(f));
            #indicating that reading in memory is finished for this data  
            self.data_holder["indicator"]='ready'; 
        
        elif("raw" in self.arg):
            from pywebhdfs.webhdfs import PyWebHdfsClient;
            hdfs = PyWebHdfsClient(host='cshadoop.boisestate.edu',port='50070', user_name='uacharya');
            
            file_path = 'user/uacharya/flow/'+str(self.arg['d'])+'/node_'+str(self.arg['n'])+'/output.csv'
            #reading the csv files in the memory
            self.data_holder['data']= hdfs.read_file(file_path,buffersize=4096) 
                
            self.data_holder["indicator"]='ready'; 
            
        elif("bitmap" in self.arg):
            #putting the line data into a object to stream
            with open(self.path+"/data.json","rb")as f:
                self.data_holder['data'] = json.dumps(cPickle.load(f));          
#             with open(self.path+"\\data.json","rb")as f:
#                 output = cPickle.load(f);  
            #not loading images into memory if there is none images
            if(self.data_holder['data']=='""'):
                #indicating that reading in memory is finished for this data  
                self.data_holder['frames']=(0,[]);
                self.data_holder["indicator"]='ready'; 
                return;
#             if(not output):
#                 self.data_holder['data']= msgpack.packb(output,use_bin_type=True);
#                 self.data_holder["indicator"]='ready'; 
#                 return;     
            #just in case there is some data to stream add all the PNGS to a list   
#             output['frames']=[];
            content_length =0; #calculate the content length in bytes of all images to stream in total
            PNGS=[]; #list to hold all the pngs data in memory
            #reading all the images to memory to stream
            for x in xrange(1,31):
                buf_string = cStringIO.StringIO();
                Image.open(self.path+"/imgs/"+str(x)+".png").save(buf_string, format="PNG", quality=100);
                content_length = content_length+(buf_string.tell()+4); 
                PNGS.append(struct.pack('>I',buf_string.tell())+buf_string.getvalue());
                buf_string.close();
#             for x in xrange(1,31):
#                 buf_string = cStringIO.StringIO();
#                 Image.open(self.path+"\\imgs\\"+str(x)+".png").save(buf_string, format="PNG", quality=100);
#                 output['frames'].append(buf_string.getvalue());
#                 buf_string.close();
                
            self.data_holder['frames']=(content_length,PNGS);
#             self.data_holder['data']=msgpack.packb(output,use_bin_type=True);
            #indicating that reading in memory is finished for this data  
            self.data_holder["indicator"]='ready'; 
                
        else:
            raise InvalidFormatError("the type of format is not available to read in memory");

开发者ID:uacharya，项目名称:WebServer，代码行数:60，代码来源:DataCreator.py

示例3: load

# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import read_file [as 别名]
def load(name, chains=None, model=None, host='localhost', port='50070', user_name=None):
	'''
	Load text database

	Parameters
	----------
	name : str
		Path to root directory in HDFS for text database without a leading '/'
	chains : list
		Chains to load. If None, all chains are loaded
	model : Model
		If None, the model is taken from the 'with' context
	host : str
		The IP address or hostname of the HDFS namenode. By default,
		it is 'localhost'
	port : str
		The port number for WebHDFS on the namenode. By default, it
		is '50070'
	user_name : str
		WebHDFS user_name used for authentication. By default, it is
		None

	Returns
	-------
	ndarray.Trace instance
	'''
	hdfs = PyWebHdfsClient(host=host, port=port, user_name=user_name)
	chain_dirs = _get_chain_dirs(name, hdfs)
	if chains is None:
		chains = list(chain_dirs.keys())
	traces = []
	for chain in chains:
		chain_dir = chain_dirs[chain]
		dir_path = os.path.join(name, chain_dir)
		shape_file = os.path.join(dir_path, 'shapes.json')
		shapes = json.load(StringIO.StringIO(hdfs.read_file(shape_file)))
		samples = {}
		for varname, shape in shapes.items():
			var_file = os.path.join(dir_path, varname + '.txt')
			samples[varname] = np.loadtxt(StringIO.StringIO(str(hdfs.read_file(var_file)))).reshape(shape)
		trace = NDArray(model=model)
		trace.samples = samples
		trace.chain = chain
		traces.append(trace)
	return base.MultiTrace(traces)

开发者ID:bkanuka，项目名称:pymc，代码行数:47，代码来源:hdfs.py

示例4: WhenTestingOpenOperation

# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import read_file [as 别名]
class WhenTestingOpenOperation(unittest.TestCase):

    def setUp(self):

        self.host = 'hostname'
        self.port = '00000'
        self.user_name = 'username'
        self.webhdfs = PyWebHdfsClient(host=self.host, port=self.port,
                                       user_name=self.user_name)
        self.response = MagicMock()
        self.requests = MagicMock(return_value=self.response)
        self.path = 'user/hdfs'
        self.file_data = u'010101'
        self.response = MagicMock()
        self.response.content = self.file_data

    def test_read_throws_exception_for_not_ok(self):

        self.response.status_code = http_client.BAD_REQUEST
        self.requests.return_value = self.response
        with patch('requests.sessions.Session.get', self.requests):
            with self.assertRaises(errors.PyWebHdfsException):
                self.webhdfs.read_file(self.path)

    def test_read_returns_file(self):

        self.response.status_code = http_client.OK
        self.requests.return_value = self.response
        with patch('requests.sessions.Session.get', self.requests):
            result = self.webhdfs.read_file(self.path)
        self.assertEqual(result, self.file_data)

    def test_stream_returns_generator(self):

        self.response.status_code = http_client.OK
        self.requests.return_value = self.response
        with patch('requests.sessions.Session.get', self.requests):
            result = self.webhdfs.stream_file(self.path)
        self.assertIsInstance(result, types.GeneratorType)

开发者ID:duedil-ltd，项目名称:pywebhdfs，代码行数:41，代码来源:test_webhdfs.py

示例5: init

# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import read_file [as 别名]

#.........这里部分代码省略.........
                {
                    'meta_data': {'version': version}
                }
            )
            if status != 200:
                if self.debug:
                    print "Error inserting: version"
        except:
            if self.debug:
                print "Upload failed."
            return False
        if self.debug:
            print "[Uploaded]", file, "version:", version
        return True

    def download(self, file, version=None, download_dir=''):
        ''' This function use to retrieve or download file from hdfs. Then save
        it as a new file named (v[version].[file] - For example, v1.mytext.txt).
        You can specify the directory of downloaded file. You can also specify
        file's version for downloading if not it will be version 1.
        :param : file - file's name
        :param : version - file's version (default: 1)
        :param : download_dir - download directory (default: '' or current directory
                 NOTE: it must end with '/' - For example, '../download/')
        :return: True if success otherwise false.
        '''
        if not version:
            version = self.get_lastest_version(file)
        key = ''.join(['v',str(version),'.',file])
        path = ''.join([self.hdfs_path,key])
        downloaded_file = ''.join([download_dir,key])
        try:
            f = open(downloaded_file, 'w')
            f.write(self.hdfs.read_file(path))
            f.close()
        except:
            if self.debug:
                print "Cannot download a file:", file
            return False
        if self.debug:
            print "[Downloaded]",key
        return True

    def update(self, file, version=None):
        ''' This function use to update file to hdfs and data stored in hbase by
        overwrite that file on hdfs, and also insert new data to hbase too. You can
        specify a file's version in order to update it.
        :param : file - file's name
        :param : version - file's version
        :return: True if success otherwise False.
        '''
        if not version:
            version = self.get_lastest_version(file)
        key = ''.join(['v',str(version),'.',file])
        path = ''.join([self.hdfs_path,key])

        # Read a file
        try:
            f = open(file,'r')
            file_content = f.read()
            f.close()
        except:
            print "Cannot read file:",file

        # Try to upload file.
        try:

开发者ID:lukkiddd，项目名称:DMSHadoop，代码行数:70，代码来源:DMS.py

示例6: print

# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import read_file [as 别名]
print(file_checksum)

# append to the file created in previous step
print('appending to file at: {0}\n'.format(example_file))
hdfs.append_file(example_file, example_data)

file_status = hdfs.get_file_dir_status(example_file)
print(file_status)

# checksum reflects file changes
file_checksum = hdfs.get_file_checksum(example_file)
print(file_checksum)

# read in the data for the file
print('reading data from file at: {0}\n'.format(example_file))
file_data = hdfs.read_file(example_file)
print(file_data)

# rename the example_dir
print('renaming directory from {0} to {1}\n').format(example_dir, rename_dir)
hdfs.rename_file_dir(example_dir, '/{0}'.format(rename_dir))

# list the contents of the new directory
listdir_stats = hdfs.list_dir(rename_dir)
print(listdir_stats)

example_file = '{dir}/example.txt'.format(dir=rename_dir)

# delete the example file
print('deleting example file at: {0}'.format(example_file))
hdfs.delete_file_dir(example_file)

开发者ID:avaranovich，项目名称:pywebhdfs，代码行数:33，代码来源:example.py

示例7: HDFS

# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import read_file [as 别名]
class HDFS(object):
    def __init__(self, host, port, user):
        self._hdfs = PyWebHdfsClient(
            host=host, port=port, user_name=user, timeout=None)
        logging.debug('webhdfs = %[email protected]%s:%s', user, host, port)

    def recursive_copy(self, local_path, remote_path, exclude=None):

        if exclude is None:
            exclude = []

        c_path = canonicalize(remote_path)
        logging.debug('making %s', c_path)
        self._hdfs.make_dir(c_path)

        fs_g = os.walk(local_path)
        for dpath, dnames, fnames in fs_g:
            _, relative_path = dpath.split(local_path)
            for dname in dnames:
                if dname not in exclude:
                    c_path = canonicalize(
                        '%s/%s/%s' %
                        (remote_path, relative_path, dname))
                    logging.debug('making %s', c_path)
                    self._hdfs.make_dir(c_path)

            for fname in fnames:
                if fname not in exclude:
                    data = file(
                        canonicalize(
                            '%s/%s/%s' %
                            (local_path, relative_path, fname)), 'rb')
                    c_path = canonicalize(
                        '%s/%s/%s' %
                        (remote_path, relative_path, fname))
                    logging.debug('creating %s', c_path)
                    self._hdfs.create_file(c_path, data, overwrite=True)
                    data.close()

    def make_dir(self, path):

        logging.debug('make_dir: %s', path)

        self._hdfs.make_dir(canonicalize(path))

    def create_file(self, data, remote_file_path):

        logging.debug('create_file: %s', remote_file_path)

        sio = StringIO.StringIO(data)

        self._hdfs.create_file(
            canonicalize(remote_file_path),
            sio,
            overwrite=True)

    def append_file(self, data, remote_file_path):

        logging.debug('append to: %s', remote_file_path)

        self._hdfs.append_file(canonicalize(remote_file_path), data)


    def stream_file_to_disk(self, remote_file_path, local_file_path):
        chunk_size = 10*1024*1024
        offset = 0
        with open(local_file_path, 'wb') as dest_file:
            data = self._hdfs.read_file(canonicalize(remote_file_path), offset=offset, length=chunk_size)
            while True:
                dest_file.write(data)
                if len(data) < chunk_size:
                    break
                offset += chunk_size
                data = self._hdfs.read_file(canonicalize(remote_file_path), offset=offset, length=chunk_size)

    def read_file(self, remote_file_path):

        data = self._hdfs.read_file(canonicalize(remote_file_path))

        return data

    def remove(self, path, recursive=False):

        logging.debug('remove: %s', path)

        self._hdfs.delete_file_dir(canonicalize(path), recursive)

开发者ID:pndaproject，项目名称:platform-deployment-manager，代码行数:88，代码来源:deployer_utils.py

示例8: PyWebHdfsClient

# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import read_file [as 别名]
from pywebhdfs.webhdfs import PyWebHdfsClient

hdfs = PyWebHdfsClient(host='localhost',port='50070',user_name='vagrant')
my_file = 'user/vagrant/hdfs-test/data.dat'

print 'Status of file: ', my_file
status = hdfs.get_file_dir_status(my_file)
print status

print 'Second 500 bytes of file: ',my_file
data = hdfs.read_file(my_file,offset=500,length=500)

print data

开发者ID:aashish24，项目名称:hadoop-for-hpcers-tutorial，代码行数:15，代码来源:readfile.py

示例9: PyWebHdfsClient

# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import read_file [as 别名]
# -*- coding: UTF8 -*-

from pywebhdfs.webhdfs import PyWebHdfsClient

my_file = '/di/logs/raw_logs/rings.event/day=20160429/2016042900rings.event.1461859205239'

hdfs = PyWebHdfsClient(host='10.160.241.61',port='14000', user_name='hdfs')
hdfs.read_file(my_file)

开发者ID:luotigerlsx，项目名称:DataAnalysis_ML，代码行数:10，代码来源:httpfs_example.py

示例10: range

# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import read_file [as 别名]
import re
import pickle
THRIFTNODE='data2'
client=PyWebHdfsClient(host='namenode',port='50070',user_name='root')
conn=happybase.Connection(THRIFTNODE) 
crawls=conn.table('crawls')
MAXLOCALLINKCOUNT = 30
timeout = 5
socket.setdefaulttimeout(timeout)
DATESTRING=str(time.strftime('%Y%m%d'))
ANET=187
for BNET in range(5,10):
  SCANSITESFILE=str(ANET)+'-'+str(BNET)+'-p80.log'
  FNAME='user/root/scans/'+str(ANET)+'/'+SCANSITESFILE
  SSFP=open(SCANSITESFILE,'w')
  SSFP.write(client.read_file(FNAME))
  SSFP.close()
  try:
   ifp=open(SCANSITESFILE,'r')
  except:
    continue
  for line in ifp:
    line = line.strip().split()
    url = 'http://'+str(line[1])+'/'
    req = urllib2.Request(url)
    html = ''
    try:
      html = urllib2.urlopen(req)
    except:
      print ' url open exception on '+str(url)
      continue

开发者ID:rob-berkes，项目名称:pycraw，代码行数:33，代码来源:crawl-and-index-sites.py

注：本文中的pywebhdfs.webhdfs.PyWebHdfsClient.read_file方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。