本文整理汇总了Python中posixpath.pjoin函数的典型用法代码示例。如果您正苦于以下问题:Python pjoin函数的具体用法?Python pjoin怎么用?Python pjoin使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了pjoin函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_load_data_partition
def test_load_data_partition(con, hdfs, tmp_dir, unpart_t, df, temp_table):
part_keys = ['year', 'month']
con.create_table(temp_table, schema=unpart_t.schema(), partition=part_keys)
part_t = con.table(temp_table)
# trim the runtime of this test
df = df[df.month == '1'].reset_index(drop=True)
unique_keys = df[part_keys].drop_duplicates()
hdfs_dir = pjoin(tmp_dir, 'load-data-partition')
df2 = df.drop(['year', 'month'], axis='columns')
csv_props = {'serialization.format': ',', 'field.delim': ','}
for i, (year, month) in enumerate(unique_keys.itertuples(index=False)):
chunk = df2[(df.year == year) & (df.month == month)]
chunk_path = pjoin(hdfs_dir, '{}.csv'.format(i))
con.write_dataframe(chunk, chunk_path)
# test both styles of insert
if i:
part = {'year': year, 'month': month}
else:
part = [year, month]
part_t.add_partition(part)
part_t.alter_partition(part, format='text', serde_properties=csv_props)
part_t.load_data(chunk_path, partition=part)
hdfs.rmdir(hdfs_dir)
verify_partitioned_table(part_t, df, unique_keys)
示例2: put
def put(self, hdfs_path, resource, overwrite=False, verbose=None,
**kwargs):
verbose = verbose or options.verbose
is_path = isinstance(resource, six.string_types)
if is_path and osp.isdir(resource):
for dirpath, dirnames, filenames in os.walk(resource):
rel_dir = osp.relpath(dirpath, resource)
if rel_dir == '.':
rel_dir = ''
for fpath in filenames:
abs_path = osp.join(dirpath, fpath)
rel_hdfs_path = pjoin(hdfs_path, rel_dir, fpath)
self.put(rel_hdfs_path, abs_path, overwrite=overwrite,
verbose=verbose, **kwargs)
else:
if is_path:
basename = os.path.basename(resource)
if self.exists(hdfs_path):
if self.status(hdfs_path)['type'] == 'DIRECTORY':
hdfs_path = pjoin(hdfs_path, basename)
if verbose:
self.log('Writing local {0} to HDFS {1}'.format(resource,
hdfs_path))
self.client.upload(hdfs_path, resource,
overwrite=overwrite, **kwargs)
else:
if verbose:
self.log('Writing buffer to HDFS {0}'.format(hdfs_path))
resource.seek(0)
self.client.write(hdfs_path, resource, overwrite=overwrite,
**kwargs)
示例3: pandas
def pandas(self, df, name=None, database=None, persist=False):
"""
Create a (possibly temp) parquet table from a local pandas DataFrame.
"""
name, database = self._get_concrete_table_path(name, database,
persist=persist)
qualified_name = self._fully_qualified_name(name, database)
# write df to a temp CSV file on HDFS
temp_csv_hdfs_dir = pjoin(options.impala.temp_hdfs_path, util.guid())
buf = BytesIO()
df.to_csv(buf, header=False, index=False, na_rep='\N')
self.hdfs.put(pjoin(temp_csv_hdfs_dir, '0.csv'), buf)
# define a temporary table using delimited data
schema = util.pandas_to_ibis_schema(df)
table = self.delimited_file(
temp_csv_hdfs_dir, schema,
name='ibis_tmp_pandas_{0}'.format(util.guid()), database=database,
external=True, persist=False)
# CTAS into Parquet
self.create_table(name, expr=table, database=database,
format='parquet', overwrite=False)
# cleanup
self.hdfs.delete(temp_csv_hdfs_dir, recursive=True)
return self._wrap_new_table(qualified_name, persist)
示例4: test_ls
def test_ls(self):
test_dir = pjoin(self.tmp_dir, 'ls-test')
self.hdfs.mkdir(test_dir)
for i in xrange(10):
local_path = self._make_random_file()
hdfs_path = pjoin(test_dir, local_path)
self.hdfs.put(hdfs_path, local_path)
assert len(self.hdfs.ls(test_dir)) == 10
示例5: test_mv_to_directory
def test_mv_to_directory(self):
remote_file = self._make_random_hdfs_file()
dest_dir = pjoin(self.tmp_dir, util.guid())
self.hdfs.mkdir(dest_dir)
self.hdfs.mv(remote_file, dest_dir)
new_remote_file = pjoin(dest_dir, os.path.basename(remote_file))
file_status = self.hdfs.status(new_remote_file)
assert file_status['type'] == 'FILE'
示例6: __init__
def __init__ (self, pic_dir, ** kw) :
base = Filename (pic_dir).base
name = pjoin (base, u"")
pic_dir_abs = sos.path.abspath (pic_dir)
self.im_dir = pjoin (pic_dir_abs, "im")
self.th_dir = pjoin (pic_dir_abs, "th")
self._entries = []
self.__super.__init__ (name = name, pic_dir = pic_dir, ** kw)
示例7: test_create_database_with_location
def test_create_database_with_location(self):
base = pjoin(self.tmp_dir, util.guid())
name = '__ibis_test_{0}'.format(util.guid())
tmp_path = pjoin(base, name)
self.con.create_database(name, path=tmp_path)
assert self.hdfs.exists(base)
self.con.drop_database(name)
self.hdfs.rmdir(base)
示例8: test_create_table_with_location
def test_create_table_with_location(self):
base = pjoin(self.tmp_dir, util.guid())
name = "test_{0}".format(util.guid())
tmp_path = pjoin(base, name)
expr = self.alltypes
table_name = _random_table_name()
self.con.create_table(table_name, expr=expr, path=tmp_path, database=self.test_data_db)
self.temp_tables.append(".".join([self.test_data_db, table_name]))
assert self.hdfs.exists(tmp_path)
示例9: test_create_database_with_location
def test_create_database_with_location(con, tmp_dir, hdfs):
base = pjoin(tmp_dir, util.guid())
name = '__ibis_test_{}'.format(util.guid())
tmp_path = pjoin(base, name)
con.create_database(name, path=tmp_path)
try:
assert hdfs.exists(base)
finally:
try:
con.drop_database(name)
finally:
hdfs.rmdir(base)
示例10: _create_777_tmp_dir
def _create_777_tmp_dir(cls):
base = pjoin(cls.tmp_dir, util.guid())
tmp_path = pjoin(base, util.guid())
env = IbisTestEnv()
superuser_hdfs = ibis.hdfs_connect(host=env.nn_host,
port=env.webhdfs_port,
auth_mechanism=env.auth_mechanism,
verify=(env.auth_mechanism
not in ['GSSAPI', 'LDAP']),
user=env.hdfs_superuser)
superuser_hdfs.mkdir(base)
superuser_hdfs.chmod(base, '777')
return tmp_path
示例11: build
def build(self, target_dir=None): # type: (Path) -> Path
self._io.writeln(" - Building <info>sdist</info>")
if target_dir is None:
target_dir = self._path / "dist"
if not target_dir.exists():
target_dir.mkdir(parents=True)
target = target_dir / "{}-{}.tar.gz".format(
self._package.pretty_name, self._meta.version
)
gz = GzipFile(target.as_posix(), mode="wb")
tar = tarfile.TarFile(
target.as_posix(), mode="w", fileobj=gz, format=tarfile.PAX_FORMAT
)
try:
tar_dir = "{}-{}".format(self._package.pretty_name, self._meta.version)
files_to_add = self.find_files_to_add(exclude_build=False)
for relpath in files_to_add:
path = self._path / relpath
tar_info = tar.gettarinfo(
str(path), arcname=pjoin(tar_dir, str(relpath))
)
tar_info = self.clean_tarinfo(tar_info)
if tar_info.isreg():
with path.open("rb") as f:
tar.addfile(tar_info, f)
else:
tar.addfile(tar_info) # Symlinks & ?
setup = self.build_setup()
tar_info = tarfile.TarInfo(pjoin(tar_dir, "setup.py"))
tar_info.size = len(setup)
tar.addfile(tar_info, BytesIO(setup))
pkg_info = self.build_pkg_info()
tar_info = tarfile.TarInfo(pjoin(tar_dir, "PKG-INFO"))
tar_info.size = len(pkg_info)
tar.addfile(tar_info, BytesIO(pkg_info))
finally:
tar.close()
gz.close()
self._io.writeln(" - Built <fg=cyan>{}</>".format(target.name))
return target
示例12: write_temp_csv
def write_temp_csv(self):
temp_hdfs_dir = pjoin(options.impala.temp_hdfs_path,
'pandas_{0}'.format(util.guid()))
self.hdfs.mkdir(temp_hdfs_dir)
# Keep track of the temporary HDFS file
self.temp_hdfs_dirs.append(temp_hdfs_dir)
# Write the file to HDFS
hdfs_path = pjoin(temp_hdfs_dir, '0.csv')
self.write_csv(hdfs_path)
return temp_hdfs_dir
示例13: test_create_table_with_location_execute
def test_create_table_with_location_execute(
con, hdfs, tmp_dir, alltypes, test_data_db, temp_table
):
base = pjoin(tmp_dir, util.guid())
name = 'test_{}'.format(util.guid())
tmp_path = pjoin(base, name)
expr = alltypes
table_name = temp_table
con.create_table(
table_name, obj=expr, location=tmp_path, database=test_data_db
)
assert hdfs.exists(tmp_path)
示例14: test_size
def test_size(self):
test_dir = pjoin(self.tmp_dir, 'size-test')
K = 2048
path = self._make_random_file(size=K)
hdfs_path = pjoin(test_dir, path)
self.hdfs.put(hdfs_path, path)
assert self.hdfs.size(hdfs_path) == K
size_test_dir = self._sample_nested_directory()
hdfs_path = pjoin(test_dir, size_test_dir)
self.hdfs.put(hdfs_path, size_test_dir)
assert self.hdfs.size(hdfs_path) == K * 7
示例15: test_get_file_overwrite
def test_get_file_overwrite(self):
local_path = self._make_random_file()
local_path2 = self._make_random_file()
remote_path = pjoin(self.tmp_dir, local_path)
self.hdfs.put(remote_path, local_path)
remote_path2 = pjoin(self.tmp_dir, local_path2)
self.hdfs.put(remote_path2, local_path2)
with self.assertRaises(IOError):
self.hdfs.get(remote_path, '.')
self.hdfs.get(remote_path, local_path2, overwrite=True)
assert open(local_path2).read() == open(local_path).read()