本文整理汇总了Python中pywebhdfs.webhdfs.PyWebHdfsClient.delete_file_dir方法的典型用法代码示例。如果您正苦于以下问题:Python PyWebHdfsClient.delete_file_dir方法的具体用法?Python PyWebHdfsClient.delete_file_dir怎么用?Python PyWebHdfsClient.delete_file_dir使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pywebhdfs.webhdfs.PyWebHdfsClient
的用法示例。
在下文中一共展示了PyWebHdfsClient.delete_file_dir方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: WhenTestingDeleteOperation
# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import delete_file_dir [as 别名]
class WhenTestingDeleteOperation(unittest.TestCase):
def setUp(self):
self.host = 'hostname'
self.port = '00000'
self.user_name = 'username'
self.webhdfs = PyWebHdfsClient(host=self.host, port=self.port,
user_name=self.user_name)
self.response = MagicMock()
self.requests = MagicMock(return_value=self.response)
self.path = 'user/hdfs/old_dir'
self.response = MagicMock()
def test_rename_throws_exception_for_not_ok(self):
self.response.status_code = httplib.BAD_REQUEST
self.requests.delete.return_value = self.response
with patch('pywebhdfs.webhdfs.requests', self.requests):
with self.assertRaises(errors.PyWebHdfsException):
self.webhdfs.delete_file_dir(self.path)
def test_rename_returns_true(self):
self.response.status_code = httplib.OK
self.requests.delete.return_value = self.response
with patch('pywebhdfs.webhdfs.requests', self.requests):
result = self.webhdfs.delete_file_dir(self.path)
self.assertTrue(result)
示例2: close
# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import delete_file_dir [as 别名]
def close(self):
# drop the temp database
self._cursor.execute('USE %s' % self._temp_db)
self._cursor.execute('SHOW TABLES')
temp_tables = [x[0] for x in self._cursor.fetchall()]
for table in temp_tables:
self._cursor.execute('DROP TABLE IF EXISTS %s.%s' % (self._temp_db, table))
self._cursor.execute('SHOW FUNCTIONS')
temp_udfs = [x[1] for x in self._cursor.fetchall()]
for udf in temp_udfs:
self._cursor.execute('DROP FUNCTION IF EXISTS %s.%s' % (self._temp_db, udf))
self._cursor.execute('SHOW AGGREGATE FUNCTIONS')
temp_udas = [x[1] for x in self._cursor.fetchall()]
for uda in temp_udas:
self._cursor.execute('DROP AGGREGATE FUNCTION IF EXISTS %s.%s' % (self._temp_db, uda))
self._cursor.execute('USE default')
self._cursor.execute('DROP DATABASE IF EXISTS %s' % self._temp_db)
# drop the temp dir in HDFS
try:
from requests.exceptions import ConnectionError
from pywebhdfs.webhdfs import PyWebHdfsClient
hdfs_client = PyWebHdfsClient(host=self._nn_host,
port=self._webhdfs_port, user_name=self._hdfs_user)
hdfs_client.delete_file_dir(self._temp_dir.lstrip('/'), recursive=True)
except ImportError:
import sys
sys.stderr.write("Could not import requests or pywebhdfs. "
"You must delete the temporary directory manually: %s" % self._temp_dir)
except ConnectionError:
import sys
sys.stderr.write("Could not connect via pywebhdfs. "
"You must delete the temporary directory manually: %s" % self._temp_dir)
示例3: __init__
# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import delete_file_dir [as 别名]
#.........这里部分代码省略.........
if self.debug:
print "Update failed."
return False
if self.debug:
print "[Updated]", file, "version:", version
return True
def delete(self, file, version=None):
''' This function use to delete file in hbase, and hdfs. You can specify
file's version in order to delete it.
:param : file - file's name
:param : version - file's version
:return: True if succes otherwise False.
'''
if not version:
version = self.get_lastest_version(file)
key = ''.join(['v',str(version),'.',file])
path = ''.join([self.hdfs_path,key])
# Check if file exists
if self.hbase_table.fetch(key) == None:
if self.debug:
print "Cannot delete.",key,"is not exists."
return False
# Remove row on hbase
t = self.hbase_table
if t.remove(key) != 200:
if self.debug:
print "[HBASE] cannot remove a row key:",key
return False
# Delete file on hdfs
if not self.hdfs.delete_file_dir(path):
if self.debug:
print "[HDFS] Cannot remove a file path:",path
return False
if self.debug:
print "[Deleted]", file, "version:", version
return True
def get_file_meta_data(self, file, version=None):
''' This function use to get all file's meta_data from hbase. You can
specify a file's version.
:param : file - file's name
:param : version - file's version
:return: meta data as dict for success, 0 if fail
'''
if not version:
version = self.get_lastest_version(file)
key = ''.join(['v',str(version),'.',file])
if not self.hbase_table.fetch(key):
if self.debug:
print key,"is not exists"
return False
return self.hbase_table.fetch(key)['meta_data']
def get_file_content(self, file, version=None):
''' This function use to get all file's content from hbase. You can
specify a file's version.
:param : file - file's name
:param : version - file's version
:return: meta data as dict for success, 0 if fail
'''
if not version:
version = self.get_lastest_version(file)
示例4: print
# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import delete_file_dir [as 别名]
# checksum reflects file changes
file_checksum = hdfs.get_file_checksum(example_file)
print(file_checksum)
# read in the data for the file
print('reading data from file at: {0}\n'.format(example_file))
file_data = hdfs.read_file(example_file)
print(file_data)
# rename the example_dir
print('renaming directory from {0} to {1}\n').format(example_dir, rename_dir)
hdfs.rename_file_dir(example_dir, '/{0}'.format(rename_dir))
# list the contents of the new directory
listdir_stats = hdfs.list_dir(rename_dir)
print(listdir_stats)
example_file = '{dir}/example.txt'.format(dir=rename_dir)
# delete the example file
print('deleting example file at: {0}'.format(example_file))
hdfs.delete_file_dir(example_file)
# list the contents of the directory
listdir_stats = hdfs.list_dir(rename_dir)
print(listdir_stats)
# delete the example directory
print('deleting the example directory at: {0}'.format(rename_dir))
hdfs.delete_file_dir(rename_dir, recursive='true')
示例5: PyWebHdfsClient
# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import delete_file_dir [as 别名]
#1 imports
from pywebhdfs.webhdfs import PyWebHdfsClient
#2 make connection with hadoop file system
hdfs = PyWebHdfsClient(user_name="hdfs",port=50070,host="sandbox.hortonworks.com")
hdfs.delete_file_dir('chapter5/LoanStats3d.csv',recursive=True)
#4 recreate the chapters directory
hdfs.make_dir('chapter5')
#5 upload the csv file
with open('./data/stored_csv.csv') as file_data:
hdfs.create_file('chapter5/LoanStats3d.csv',file_data, overwrite=True)
#6 print the status to see if this succeeded.
print hdfs.get_file_dir_status('chapter5/LoanStats3d.csv')
示例6: HDFS
# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import delete_file_dir [as 别名]
class HDFS(object):
def __init__(self, host, port, user):
self._hdfs = PyWebHdfsClient(
host=host, port=port, user_name=user, timeout=None)
logging.debug('webhdfs = %[email protected]%s:%s', user, host, port)
def recursive_copy(self, local_path, remote_path, exclude=None):
if exclude is None:
exclude = []
c_path = canonicalize(remote_path)
logging.debug('making %s', c_path)
self._hdfs.make_dir(c_path)
fs_g = os.walk(local_path)
for dpath, dnames, fnames in fs_g:
_, relative_path = dpath.split(local_path)
for dname in dnames:
if dname not in exclude:
c_path = canonicalize(
'%s/%s/%s' %
(remote_path, relative_path, dname))
logging.debug('making %s', c_path)
self._hdfs.make_dir(c_path)
for fname in fnames:
if fname not in exclude:
data = file(
canonicalize(
'%s/%s/%s' %
(local_path, relative_path, fname)), 'rb')
c_path = canonicalize(
'%s/%s/%s' %
(remote_path, relative_path, fname))
logging.debug('creating %s', c_path)
self._hdfs.create_file(c_path, data, overwrite=True)
data.close()
def make_dir(self, path):
logging.debug('make_dir: %s', path)
self._hdfs.make_dir(canonicalize(path))
def create_file(self, data, remote_file_path):
logging.debug('create_file: %s', remote_file_path)
sio = StringIO.StringIO(data)
self._hdfs.create_file(
canonicalize(remote_file_path),
sio,
overwrite=True)
def append_file(self, data, remote_file_path):
logging.debug('append to: %s', remote_file_path)
self._hdfs.append_file(canonicalize(remote_file_path), data)
def stream_file_to_disk(self, remote_file_path, local_file_path):
chunk_size = 10*1024*1024
offset = 0
with open(local_file_path, 'wb') as dest_file:
data = self._hdfs.read_file(canonicalize(remote_file_path), offset=offset, length=chunk_size)
while True:
dest_file.write(data)
if len(data) < chunk_size:
break
offset += chunk_size
data = self._hdfs.read_file(canonicalize(remote_file_path), offset=offset, length=chunk_size)
def read_file(self, remote_file_path):
data = self._hdfs.read_file(canonicalize(remote_file_path))
return data
def remove(self, path, recursive=False):
logging.debug('remove: %s', path)
self._hdfs.delete_file_dir(canonicalize(path), recursive)
示例7: PyWebHdfsClient
# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import delete_file_dir [as 别名]
line = line.values()
line[0], line[5] = line[5], line[0]
return ', '.join(line)
if __name__ == '__main__':
host = 'hdfs://localhost:9000'
ticker_path = host + '/user/hadoop/tickers.txt'
save_path = host + '/user/hadoop/stock'
hdfs = PyWebHdfsClient(host='localhost', port='50070', user_name='hadoop')
folder = hdfs.list_dir('user/hadoop/stock')['FileStatuses']['FileStatus']
files = sorted([dt.datetime.strptime(f['pathSuffix'].split('.')[0], '%Y-%m-%d').date() for f in folder])
end = dt.date.today().strftime('%Y-%m-%d')
sc = SparkContext(appName='stock_data')
if len(files) > 3:
hdfs.delete_file_dir(join(save_path, files[0].strftime('%Y-%m-%d') + '.csv'), recursive=True)
if len(files) == 0:
start = '2014-01-01'
stockData = sc.textFile(ticker_path).flatMap(lambda x: Share(x).get_historical(start, end)).map(formatLine)
stockData.saveAsTextFile(join(save_path, end + '.csv'))
else:
start = (files[-1] + dt.timedelta(days=1)).strftime('%Y-%m-%d')
histStockData = sc.textFile(join(save_path, files[-1].strftime('%Y-%m-%d') + '.csv'))
stockData = sc.textFile(ticker_path).flatMap(lambda x: Share(x).get_historical(start, end)).map(formatLine)
histStockData.union(stockData).saveAsTextFile(join(save_path, end + '.csv'))
示例8: BeautifulSoup
# 需要导入模块: from pywebhdfs.webhdfs import PyWebHdfsClient [as 别名]
# 或者: from pywebhdfs.webhdfs.PyWebHdfsClient import delete_file_dir [as 别名]
HADOOP_HTMLFILE='user/root/crawls/'+str(ANET)+'/'+str(BNET)+'/'+HTMLFILE
HADOOP_TEXTFILE='user/root/texts/'+str(ANET)+'/'+str(BNET)+'/'+TEXTFILE
print "-======= site: "+str(url)+" =======-"
try:
soup = BeautifulSoup(html)
except:
print " soup exception"
continue
HFP=open(HTMLFILE,'w')
HFP.write(soup.encode('utf-8'))
HFP.close()
with open(HTMLFILE) as hfp:
try:
client.create_file(HADOOP_HTMLFILE,hfp)
except:
client.delete_file_dir(HADOOP_HTMLFILE)
client.create_file(HADOOP_HTMLFILE,hfp)
TFP=open(TEXTFILE,'w')
WRITEOUT=unicode(soup.get_text())
WORDLIST=re.sub(r'[^a-zA-Z0-9 ]',r' ',WRITEOUT)
WORDLIST=WORDLIST.strip().split()
TFP.write(WRITEOUT.encode('utf-8'))
TFP.close()
PAGETITLE=''
try:
PAGETITLE=soup.title.string
except:
pass
try:
PAGETITLE=PAGETITLE.encode('utf-8')