当前位置: 首页>>代码示例>>Python>>正文


Python PyWebHdfsClient.get_file_dir_status方法代码示例

本文整理汇总了Python中pywebhdfs.webhdfs.PyWebHdfsClient.get_file_dir_status方法的典型用法代码示例。如果您正苦于以下问题:Python PyWebHdfsClient.get_file_dir_status方法的具体用法?Python PyWebHdfsClient.get_file_dir_status怎么用?Python PyWebHdfsClient.get_file_dir_status使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pywebhdfs.webhdfs.PyWebHdfsClient的用法示例。


在下文中一共展示了PyWebHdfsClient.get_file_dir_status方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: create_data_from_station_data

# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import get_file_dir_status [as 别名]
def create_data_from_station_data(first, second):
    """this function creates the data analyzing the two stations in comparison"""
    global hdfs; #global hdfs object
    global hbase; #global hbase object
    
    if(hdfs is None): 
        from pywebhdfs.webhdfs import PyWebHdfsClient; 
        hdfs = PyWebHdfsClient(host='cshadoop.boisestate.edu',port='50070', user_name='uacharya'); 
   
    if(hbase is None):
        import happybase;
        hbase = happybase.ConnectionPool(size=1,host='cshadoop.boisestate.edu');
 
    date_for_comparision = first["Date"].strip();

   # creating directory for each date
    try:
        hdfs.get_file_dir_status('user/uacharya/simulation/'+date_for_comparision);
    except Exception:
        # directory to hold dataset in csv file for reach node in wall display starting from 1 to 9    
        for index in range(1, 10):
            content = 'Date,ID,Source,Destination,S_Lat,S_Lon,D_Lat,D_Lon,Wind_Lat,Wind_Lon,Wind_Velocity\n';
            try:
                hdfs.create_file('user/uacharya/simulation/'+date_for_comparision+'/node'+str(index)+'/output.csv',content,replication=1);
            except Exception:
                continue;
   
    
    dataset = {'node_1':[],'node_2':[],'node_3':[],'node_4':[],'node_5':[],'node_6':[],'node_7':[],'node_8':[],'node_9':[]};
   
    for data in broadcast_variable.value:
        compare_data_between(date_for_comparision, first, data,dataset);

#    for key in dataset:
#        if(len(dataset[key])!=0):
#            content = "\n".join(dataset[key]);
#            content +="\n";
#            while(True):
#                try:
#                    hdfs.append_file('user/uacharya/simulation/'+date+'/'+key+'/output.csv',content,buffersize=4096);
#                    break;
#                except Exception:
#                    time.sleep(0.2);
#                    continue;

    
    dataset.clear(); #clearing the dictionary
    # append over here after all the global variable has been made        
    return second;
开发者ID:uacharya,项目名称:WebServer,代码行数:51,代码来源:TestingSpark.py

示例2: WhenTestingGetFileStatusOperation

# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import get_file_dir_status [as 别名]
class WhenTestingGetFileStatusOperation(unittest.TestCase):

    def setUp(self):

        self.host = 'hostname'
        self.port = '00000'
        self.user_name = 'username'
        self.webhdfs = PyWebHdfsClient(host=self.host, port=self.port,
                                       user_name=self.user_name)
        self.response = MagicMock()
        self.requests = MagicMock(return_value=self.response)
        self.path = 'user/hdfs/old_dir'
        self.response = MagicMock()
        self.file_status = {
            "FileStatus": {
                "accessTime": 0,
                "blockSize": 0,
                "group": "supergroup",
                "length": 0,
                "modificationTime": 1320173277227,
                "owner": "webuser",
                "pathSuffix": "",
                "permission": "777",
                "replication": 0,
                "type": "DIRECTORY"
            }
        }
        self.response.json = MagicMock(return_value=self.file_status)

    def test_get_status_throws_exception_for_not_ok(self):

        self.response.status_code = httplib.BAD_REQUEST
        self.requests.get.return_value = self.response
        with patch('pywebhdfs.webhdfs.requests', self.requests):
            with self.assertRaises(errors.PyWebHdfsException):
                self.webhdfs.get_file_dir_status(self.path)

    def test_get_status_returns_true(self):

        self.response.status_code = httplib.OK
        self.requests.get.return_value = self.response
        with patch('pywebhdfs.webhdfs.requests', self.requests):
            result = self.webhdfs.get_file_dir_status(self.path)

        for key in result:
            self.assertEqual(result[key], self.file_status[key])
开发者ID:waliaashish85,项目名称:pywebhdfs,代码行数:48,代码来源:test_webhdfs.py

示例3: load

# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import get_file_dir_status [as 别名]
    def load(self, job, task, fifo):
        self.job = job
        self.task = task
        self.fifo = fifo
        self.key = None
        self.script_proc = None
        self.decompress_obj = None
        self.pycurl_callback_exception = None

        if task.data['scheme'] == 's3':
            self.is_anonymous = job.spec.source.aws_access_key is None or job.spec.source.aws_secret_key is None
            if self.is_anonymous:
                s3_conn = S3Connection(anon=True)
            else:
                s3_conn = S3Connection(job.spec.source.aws_access_key, job.spec.source.aws_secret_key)
            bucket = s3_conn.get_bucket(task.data['bucket'])

            try:
                self.key = bucket.get_key(task.data['key_name'])
            except S3ResponseError as e:
                raise WorkerException("Received %s %s accessing `%s`, aborting" % (e.status, e.reason, task.data['key_name']))
        elif task.data['scheme'] == 'hdfs':
            fname = task.data['key_name']
            client = PyWebHdfsClient(
                job.spec.source.hdfs_host,
                job.spec.source.webhdfs_port,
                user_name=job.spec.source.hdfs_user)
            try:
                filesize = client.get_file_dir_status(fname)['FileStatus']['length']
            except pywebhdfs.errors.FileNotFound:
                raise WorkerException("File '%s' does not exist on HDFS" % fname)
            self.key = AttrDict({'name': fname, 'size': filesize})
        elif task.data['scheme'] == 'file':
            globber = glob2.Globber()
            fname = globber._normalize_string(task.data['key_name'])

            if not os.path.exists(fname):
                raise WorkerException("File '%s' does not exist on this filesystem" % fname)
            elif not os.path.isfile(fname):
                raise WorkerException("File '%s' exists, but is not a file" % fname)

            self.key = AttrDict({'name': fname, 'size': os.path.getsize(fname)})
        else:
            raise WorkerException('Unsupported job with paths: %s' % [ str(p) for p in self.job.paths ])

        if self.key is None:
            raise WorkerException('Failed to find key associated with task ID %s' % task.task_id)

        self.metrics = DownloadMetrics(self.key.size)
开发者ID:Vlad777,项目名称:memsql-loader,代码行数:51,代码来源:downloader.py

示例4: __init__

# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import get_file_dir_status [as 别名]
class DMS:
    def __init__(self, debug=0):
        ''' This function use to init a class. To show an error messages debug
        should be 1.
        :param : debug - 1, show an error or success message. 0 otherwise
        :return: Nothing.
        '''
        self.debug = debug
        pass

    def hbase_connection(self, host, port, table='dms'):
        ''' This function use to establish a connection to hbase, for preparing to
        insert, remove, fetch data from hbase. We use starbase for connect to hbase
        via rest api.(See more: https://github.com/barseghyanartur/starbase)
        :param : host - hbase rest host
        :param : port - hbase rest running port
        :param : table - DMS table on hbase (default: 'dms')
        :return: Nothing.
        '''
        self.hbase = hbaseConnection(host=host, port=port)
        t = self.hbase.table(table)
        if (not t.exists()):
            t.create('meta_data','file')
        self.hbase_table = t

    def hdfs_connection(self, host, port, user_name, hdfs_path='/tmp/'):
        ''' This function use to establish a connection to hdfs, for preparing to
        create, retrieve, update, delete file in hdfs. We use pywebhdfs in order to
        do this task via hdfs rest api.(See more: http://pythonhosted.org/pywebhdfs/)
        :param : host - hdfs rest host
        :param : port - hdfs rest running port
        :param : user_name - hdfs username (for authentication)
        :param : hdfs_path - location to store files. (default: '/tmp/')
        :return: Nothing.
        '''
        self.hdfs = PyWebHdfsClient(host=host, port=port, user_name=user_name)
        self.hdfs_path = hdfs_path

    def solr_connection(self, host, port, collection):
        ''' This function use to establish a connection to solr, for query or
        search any text on a system.
        :param : host - solr's host
        :param : port - solr's running port
        :param : collection - solr's collection for searching
        '''
        self.solr = ''.join(['http://',host,':',port,'/solr/',collection])

    def extract(self, file):
        ''' This function use to extract meta data from a file. We use hachoir3 library
        to extract them. (See more: http://hachoir3.readthedocs.org)
        :param : file - file for extract
        :return: meta data as dict for success, 0 if fail.
        '''
    	try:
    		filename, realname = unicodeFilename(file), file
    		parser = createParser(filename, realname)
    		meta_data = extractMetadata(parser)
    		meta_data_text = meta_data.exportPlaintext()
    		meta_list = dict()
    		for i in range(1,len(meta_data_text)):
    			meta_split = meta_data_text[i].split(":")
    			column = meta_split[0].replace('- ','')
    			value = meta_split[1].lstrip()
    			meta_list.update({column:value})
    		return meta_list
    	except:
            if self.debug:
        		print "Something went wrong, meta data of",file,"could not extract."
            return None


    def upload(self, file):
        ''' This function use to uplaod a file to hdfs and store meta data on hbase
        Meta data consist of 2 main parts: file's meta data and hdfs's file's meta data.
        This function will increase a file version if it is already store in hbase.
        :param : file - file's name
        :return: True if success otherwise False.
        '''
        version = 1
        key = ''.join(['v',str(version),'.',file])
        path = ''.join([self.hdfs_path,key])

        # Read a file
        try:
            f = open(file,'r')
            file_content = f.read()
            f.close()
        except:
            print "Cannot read file:",file

        # Check file's version
        while self.hbase_table.fetch(key) != None:
            version = int(self.get_lastest_version(file)) + 1
            key = ''.join(['v',str(version),'.',file])
            path = ''.join([self.hdfs_path,key])

        # Try to upload file.
        try:
            self.hdfs.create_file(path,file_content)
            hdfs_meta = self.hdfs.get_file_dir_status(path)['FileStatus']
#.........这里部分代码省略.........
开发者ID:lukkiddd,项目名称:DMSHadoop,代码行数:103,代码来源:DMS.py

示例5: PyWebHdfsClient

# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import get_file_dir_status [as 别名]
example_dir = 'user/hdfs/example_dir'
example_file = '{dir}/example.txt'.format(dir=example_dir)
example_data = '01010101010101010101010101010101010101010101\n'
rename_dir = 'user/hdfs/example_rename'


# create a new client instance
hdfs = PyWebHdfsClient(host='localhost', port='50070',
                       user_name='hduser')

# create a new directory for the example
print('making new HDFS directory at: {0}\n'.format(example_dir))
hdfs.make_dir(example_dir)

# get a dictionary of the directory's status
dir_status = hdfs.get_file_dir_status(example_dir)
print(dir_status)

# create a new file on hdfs
print('making new file at: {0}\n'.format(example_file))
hdfs.create_file(example_file, example_data)

file_status = hdfs.get_file_dir_status(example_file)
print(file_status)

# get the checksum for the file
file_checksum = hdfs.get_file_checksum(example_file)
print(file_checksum)

# append to the file created in previous step
print('appending to file at: {0}\n'.format(example_file))
开发者ID:avaranovich,项目名称:pywebhdfs,代码行数:33,代码来源:example.py

示例6: PyWebHdfsClient

# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import get_file_dir_status [as 别名]
#1 imports

from pywebhdfs.webhdfs import PyWebHdfsClient

#2 make connection with hadoop file system

hdfs = PyWebHdfsClient(user_name="hdfs",port=50070,host="sandbox.hortonworks.com")


hdfs.delete_file_dir('chapter5/LoanStats3d.csv',recursive=True)

#4 recreate the chapters directory

hdfs.make_dir('chapter5')

#5 upload the csv file

with open('./data/stored_csv.csv') as file_data:
	hdfs.create_file('chapter5/LoanStats3d.csv',file_data, overwrite=True)

#6 print the status to see if this succeeded.
print hdfs.get_file_dir_status('chapter5/LoanStats3d.csv')
开发者ID:DavyCielen,项目名称:python_book_chapter5,代码行数:24,代码来源:store_hadoop.py

示例7: PyWebHdfsClient

# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import get_file_dir_status [as 别名]
from pywebhdfs.webhdfs import PyWebHdfsClient

hdfs = PyWebHdfsClient(host='localhost',port='50070',user_name='vagrant')
my_file = 'user/vagrant/hdfs-test/data.dat'

print 'Status of file: ', my_file
status = hdfs.get_file_dir_status(my_file)
print status

print 'Second 500 bytes of file: ',my_file
data = hdfs.read_file(my_file,offset=500,length=500)

print data
开发者ID:aashish24,项目名称:hadoop-for-hpcers-tutorial,代码行数:15,代码来源:readfile.py


注:本文中的pywebhdfs.webhdfs.PyWebHdfsClient.get_file_dir_status方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。