本文整理汇总了Python中mrjob.util.tar_and_gzip函数的典型用法代码示例。如果您正苦于以下问题:Python tar_and_gzip函数的具体用法?Python tar_and_gzip怎么用?Python tar_and_gzip使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了tar_and_gzip函数的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: setUp
def setUp(self):
super(SetupTestCase, self).setUp()
os.mkdir(os.path.join(self.tmp_dir, 'foo'))
self.foo_py = os.path.join(self.tmp_dir, 'foo', 'foo.py')
# if our job can import foo, getsize will return 2x as many bytes
with open(self.foo_py, 'w') as foo_py:
foo_py.write('import os.path\n'
'from os.path import getsize as _real_getsize\n'
'os.path.getsize = lambda p: _real_getsize(p) * 2')
self.foo_sh = os.path.join(self.tmp_dir, 'foo', 'foo.sh')
with open(self.foo_sh, 'w') as foo_sh:
foo_sh.write('#!/bin/sh\n'
'touch foo.sh-made-this\n')
os.chmod(self.foo_sh, stat.S_IRWXU)
self.foo_tar_gz = os.path.join(self.tmp_dir, 'foo.tar.gz')
tar_and_gzip(os.path.join(self.tmp_dir, 'foo'), self.foo_tar_gz)
self.foo_py_size = os.path.getsize(self.foo_py)
self.foo_sh_size = os.path.getsize(self.foo_sh)
self.foo_tar_gz_size = os.path.getsize(self.foo_tar_gz)
示例2: setUp
def setUp(self):
super(SetupTestCase, self).setUp()
os.mkdir(os.path.join(self.tmp_dir, "foo"))
self.foo_py = os.path.join(self.tmp_dir, "foo", "foo.py")
# if our job can import foo, getsize will return 2x as many bytes
with open(self.foo_py, "w") as foo_py:
foo_py.write(
"import os.path\n"
"from os.path import getsize as _real_getsize\n"
"os.path.getsize = lambda p: _real_getsize(p) * 2"
)
self.foo_sh = os.path.join(self.tmp_dir, "foo", "foo.sh")
with open(self.foo_sh, "w") as foo_sh:
foo_sh.write("#!/bin/sh\n" "touch foo.sh-made-this\n")
os.chmod(self.foo_sh, stat.S_IRWXU)
self.foo_tar_gz = os.path.join(self.tmp_dir, "foo.tar.gz")
tar_and_gzip(os.path.join(self.tmp_dir, "foo"), self.foo_tar_gz)
self.foo_py_size = os.path.getsize(self.foo_py)
self.foo_sh_size = os.path.getsize(self.foo_sh)
self.foo_tar_gz_size = os.path.getsize(self.foo_tar_gz)
示例3: _create_mrjob_tar_gz
def _create_mrjob_tar_gz(self):
"""Make a tarball of the mrjob library, without .pyc or .pyo files,
and return its path. This will also set self._mrjob_tar_gz_path
It's safe to call this method multiple times (we'll only create
the tarball once.)
"""
if self._mrjob_tar_gz_path is None:
# find mrjob library
import mrjob
if not os.path.basename(mrjob.__file__).startswith('__init__.'):
raise Exception(
"Bad path for mrjob library: %s; can't bootstrap mrjob",
mrjob.__file__)
mrjob_dir = os.path.dirname(mrjob.__file__) or '.'
tar_gz_path = os.path.join(
self._get_local_tmp_dir(), 'mrjob.tar.gz')
def filter_path(path):
filename = os.path.basename(path)
return not(file_ext(filename).lower() in ('.pyc', '.pyo') or
# filter out emacs backup files
filename.endswith('~') or
# filter out emacs lock files
filename.startswith('.#') or
# filter out MacFuse resource forks
filename.startswith('._'))
tar_and_gzip(mrjob_dir, tar_gz_path, filter=filter_path)
self._mrjob_tar_gz_path = tar_gz_path
return self._mrjob_tar_gz_path
示例4: test_extract_dir_for_tar
def test_extract_dir_for_tar(self):
join = os.path.join
tar_and_gzip(dir=join(self.tmp_dir, 'a'),
out_path=join(self.tmp_dir, 'not_a.tar.gz'),
prefix='b')
assert_equal(extract_dir_for_tar(join(self.tmp_dir, 'not_a.tar.gz')),
'b')
示例5: all_pairs_BIC_using_mapreduce
def all_pairs_BIC_using_mapreduce(self, iteration_bic_list, em_iters, X, gmm_list):
"""
Computes the BIC score for all pairs by using MapReduce and returns
the pair with the best score
"""
print "Map-Reduce execution"
# iter_gmm_list = map(lambda(gidx, didx): gmm_list[gidx], iteration_bic_list)
# pickle.dump(iter_gmm_list, open('iter_gmm_list', 'w'))
# os.chmod("iter_gmm_list", S_IRUSR | S_IWUSR | S_IXUSR | \
# S_IRGRP | S_IXGRP | \
# S_IROTH | S_IXOTH )
from subprocess import call
call(["mkdir", "-p", "gmm"])
for i in range (0, len(iteration_bic_list)):
gidx, didx = iteration_bic_list[i]
pickle.dump(gmm_list[gidx], open('gmm/'+str(i), 'w'))
os.chmod("iter_gmm_list", S_IRUSR | S_IWUSR | S_IXUSR | \
S_IRGRP | S_IXGRP | \
S_IROTH | S_IXOTH )
import mrjob.util as util
util.tar_and_gzip('gmm', 'gmm.tgz')
input = []
l = len(iteration_bic_list)
for gmm1idx in range(l):
for gmm2idx in range(gmm1idx+1, l):
gidx1, didx1 = iteration_bic_list[gmm1idx]
gidx2, didx2 = iteration_bic_list[gmm2idx]
an_item = protocol().write((gmm1idx,gmm2idx),(didx1, didx2, em_iters))
input.append(an_item+"\n")
mr_args = ['-v', '-r', 'hadoop','--input-protocol', 'pickle','--output-protocol','pickle','--protocol','pickle']
job = AllPairsBicScoreMRJob(args=mr_args).sandbox(stdin=input)
runner = job.make_runner()
runner.run()
kv_pairs = map(job.parse_output_line, runner.stream_output())
assert len(kv_pairs) == 1
merged_tuple_indices, best_score = kv_pairs[0][1]
# Re-merge the GMM pair with the highest score *here*, otherwise the next
# segment_majority_vote will crash (issue with data ownership). If we don't
# find a different workaround, we can simplify more the mapper and the reducer.
# Essentially, we can avoid moving from mappers to the reducer the GMM pairs and
# merged GMMs. Instead, we can move just indices and scores.
# However, this re-merging is serialized...
ind1, ind2 = merged_tuple_indices
gidx1, idx1 = iteration_bic_list[ind1]
gidx2, idx2 = iteration_bic_list[ind2]
d1 = tools.get_data_from_indices(X, idx1)
d2 = tools.get_data_from_indices(X, idx2)
data = np.concatenate((d1,d2))
g1 = gmm_list[gidx1]
g2 = gmm_list[gidx2]
new_gmm, score = compute_distance_BIC(g1, g2, data, em_iters)
return new_gmm, (g1, g2), merged_tuple_indices, best_score
示例6: test_tar_and_gzip
def test_tar_and_gzip(self):
join = os.path.join
# tar it up, and put it in subdirectory (b/)
tar_and_gzip(dir=join(self.tmp_dir, 'a'),
out_path=join(self.tmp_dir, 'a.tar.gz'),
filter=lambda path: not path.endswith('z'),
prefix='b')
# untar it into b/
t = tarfile.open(join(self.tmp_dir, 'a.tar.gz'), 'r:gz')
t.extractall(self.tmp_dir)
t.close()
self.ensure_expected_results(excluded_files=['baz'])
示例7: test_master_bootstrap_script_is_valid_python
def test_master_bootstrap_script_is_valid_python(self):
# create a fake src tarball
with open(os.path.join(self.tmp_dir, 'foo.py'), 'w'): pass
yelpy_tar_gz_path = os.path.join(self.tmp_dir, 'yelpy.tar.gz')
tar_and_gzip(self.tmp_dir, yelpy_tar_gz_path, prefix='yelpy')
# do everything
runner = EMRJobRunner(conf_path=False,
bootstrap_cmds=['echo "Hi!"', 'true', 'ls'],
bootstrap_files=['/tmp/quz'],
bootstrap_mrjob=True,
bootstrap_python_packages=[yelpy_tar_gz_path],
bootstrap_scripts=['speedups.sh', '/tmp/s.sh'])
script_path = os.path.join(self.tmp_dir, 'b.py')
runner._create_master_bootstrap_script(dest=script_path)
assert os.path.exists(script_path)
py_compile.compile(script_path)
示例8: _create_mrjob_tar_gz
def _create_mrjob_tar_gz(self):
"""Make a tarball of the mrjob library, without .pyc or .pyo files,
This will also set ``self._mrjob_tar_gz_path`` and return it.
Typically called from
:py:meth:`_create_setup_wrapper_script`.
It's safe to call this method multiple times (we'll only create
the tarball once.)
"""
if not self._mrjob_tar_gz_path:
# find mrjob library
import mrjob
if not os.path.basename(mrjob.__file__).startswith('__init__.'):
raise Exception(
"Bad path for mrjob library: %s; can't bootstrap mrjob",
mrjob.__file__)
mrjob_dir = os.path.dirname(mrjob.__file__) or '.'
tar_gz_path = os.path.join(self._get_local_tmp_dir(),
'mrjob.tar.gz')
def filter_path(path):
filename = os.path.basename(path)
return not(filename.lower().endswith('.pyc') or
filename.lower().endswith('.pyo') or
# filter out emacs backup files
filename.endswith('~') or
# filter out emacs lock files
filename.startswith('.#') or
# filter out MacFuse resource forks
filename.startswith('._'))
log.debug('archiving %s -> %s as %s' % (
mrjob_dir, tar_gz_path, os.path.join('mrjob', '')))
tar_and_gzip(
mrjob_dir, tar_gz_path, filter=filter_path, prefix='mrjob')
self._mrjob_tar_gz_path = tar_gz_path
return self._mrjob_tar_gz_path
示例9: _create_mrjob_tar_gz
def _create_mrjob_tar_gz(self):
"""Make a tarball of the mrjob library, without .pyc or .pyo files,
and return its path. This will also set self._mrjob_tar_gz_path
It's safe to call this method multiple times (we'll only create
the tarball once.)
"""
if self._mrjob_tar_gz_path is None:
# find mrjob library
import mrjob
if not os.path.basename(mrjob.__file__).startswith("__init__."):
raise Exception("Bad path for mrjob library: %s; can't bootstrap mrjob", mrjob.__file__)
mrjob_dir = os.path.dirname(mrjob.__file__) or "."
tar_gz_path = os.path.join(self._get_local_tmp_dir(), "mrjob.tar.gz")
def filter_path(path):
filename = os.path.basename(path)
return not (
file_ext(filename).lower() in (".pyc", ".pyo")
or
# filter out emacs backup files
filename.endswith("~")
or
# filter out emacs lock files
filename.startswith(".#")
or
# filter out MacFuse resource forks
filename.startswith("._")
)
log.debug("archiving %s -> %s as %s" % (mrjob_dir, tar_gz_path, os.path.join("mrjob", "")))
tar_and_gzip(mrjob_dir, tar_gz_path, filter=filter_path, prefix="mrjob")
self._mrjob_tar_gz_path = tar_gz_path
return self._mrjob_tar_gz_path
示例10: test_extract_dir_for_tar
def test_extract_dir_for_tar(self):
join = os.path.join
tar_and_gzip(dir=join(self.tmp_dir, "a"), out_path=join(self.tmp_dir, "not_a.tar.gz"), prefix="b")
self.assertEqual(extract_dir_for_tar(join(self.tmp_dir, "not_a.tar.gz")), "b")