Python hdfs.mkdir函数代码示例

本文整理汇总了Python中pydoop.hdfs.mkdir函数的典型用法代码示例。如果您正苦于以下问题：Python mkdir函数的具体用法？Python mkdir怎么用？Python mkdir使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了mkdir函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _write

    def _write(self, data):
        "Internal Write API"
        schema = self.schema
        wmaid = self.wmaid(data)
        year, month, _ = today()
        hdir = '%s/%s/%s' % (self.hdir, year, month)
        if  not hdfs.path.isdir(hdir):
            hdfs.mkdir(hdir)
        fname = file_name(hdir, wmaid, self.compress)

        # create Avro writer and binary encoder
        writer = avro.io.DatumWriter(schema)
        bytes_writer = io.BytesIO()

        if  self.compress:
            # use gzip'ed writer with BytesIO file object
            gzip_writer = gzip.GzipFile(fileobj=bytes_writer, mode='wb')
            encoder = avro.io.BinaryEncoder(gzip_writer)
        else:
            # plain binary reader
            encoder = avro.io.BinaryEncoder(bytes_writer)

        # write records from given data stream to binary writer
        writer.write(data, encoder)

        # close gzip stream if necessary
        if  self.compress:
            gzip_writer.flush()
            gzip_writer.close()

        # store raw data to hadoop via HDFS
        hdfs.dump(bytes_writer.getvalue(), fname)

        # close bytes stream
        bytes_writer.close()

开发者ID:yuyiguo，项目名称:WMArchive，代码行数:35，代码来源:HdfsIO.py

示例2: __setup_remote_paths

    def __setup_remote_paths(self):
        """
        Actually create the working directory and copy the module into it.

        Note: the script has to be readable by Hadoop; though this may not
        generally be a problem on HDFS, where the Hadoop user is usually
        the superuser, things may be different if our working directory is
        on a shared POSIX filesystem.  Therefore, we make the directory
        and the script accessible by all.
        """
        self.logger.debug("remote_wd: %s", self.remote_wd)
        self.logger.debug("remote_exe: %s", self.remote_exe)
        self.logger.debug("remotes: %s", self.files_to_upload)
        if self.args.module:
            self.logger.debug(
                'Generated pipes_code:\n\n %s', self._generate_pipes_code()
            )
        if not self.args.pretend:
            hdfs.mkdir(self.remote_wd)
            hdfs.chmod(self.remote_wd, "a+rx")
            self.logger.debug("created and chmod-ed: %s", self.remote_wd)
            pipes_code = self._generate_pipes_code()
            hdfs.dump(pipes_code, self.remote_exe)
            self.logger.debug("dumped pipes_code to: %s", self.remote_exe)
            hdfs.chmod(self.remote_exe, "a+rx")
            self.__warn_user_if_wd_maybe_unreadable(self.remote_wd)
            for (l, h, _) in self.files_to_upload:
                self.logger.debug("uploading: %s to %s", l, h)
                hdfs.cp(l, h)
        self.logger.debug("Created%sremote paths:" %
                          (' [simulation] ' if self.args.pretend else ' '))

开发者ID:kikkomep，项目名称:pydoop，代码行数:31，代码来源:submit.py

示例3: mk_hdfs_temp_dir

def mk_hdfs_temp_dir(prefix):
    found = True
    while found:
        tmp = os.path.basename(tempfile.mktemp(prefix=prefix))
        found = phdfs.path.exists(tmp)
    phdfs.mkdir(tmp)
    return tmp

开发者ID:ilveroluca，项目名称:flink-pipeline，代码行数:7，代码来源:util.py

示例4: run_alignments

def run_alignments(bcl_output_dir, output_dir):
    sample_directories = _get_samples_from_bcl_output(bcl_output_dir)
    logger.info("Found %d samples in bcl output directory", len(sample_directories))
    logger.debug("Making base output directory %s", output_dir)
    phdfs.mkdir(output_dir)
    # launch all the jobs
    base_cmd = [
            get_exec('seal'), 'seqal', '--align-only',
            '-D', 'seal.seqal.nthreads={:d}'.format(GlobalConf['seqal_nthreads']),
            '-D', 'mapreduce.map.cpu.vcores={:d}'.format(GlobalConf['seqal_yarn_cores']),
            '--input-format', GlobalConf.get('seqal_input_fmt', 'prq'),
            '--output-format', GlobalConf.get('seqal_output_fmt', 'sam'),
            '--ref-archive', GlobalConf['reference_archive'],
        ]
    def start_job(sample_dir):
        sample_output_dir = phdfs.path.join(output_dir, os.path.basename(sample_dir))
        cmd = base_cmd + [ sample_dir, sample_output_dir ]
        # LP: should refactor to start the job within the AlignJob object
        job = AlignJob(cmd=cmd, inputp=sample_dir, outputp=sample_output_dir)
        logger.info("Launching alignment of sample %s", os.path.basename(sample_dir))
        logger.debug("executing command: %s", cmd)
        job.popen_obj = subprocess.Popen(map(str, cmd), bufsize=4096)
        job.popen_obj.poll()
        logger.debug("job running with PID %d", job.popen_obj.pid)
        return job

    jobs = [ start_job(s) for s in sample_directories ]
    ok = _wait(jobs, GlobalConf['remove_output'])
    if not ok:
        errored_jobs = [ j for j in jobs if j.failed ]
        logger.error("%d alignment jobs failed", len(errored_jobs))
        logger.error("Here are the return codes: %s", ', '.join([ str(j.retcode) for j in errored_jobs ]))
        raise RuntimeError("Some alignment jobs failed")

开发者ID:ilveroluca，项目名称:flink-pipeline，代码行数:33，代码来源:workflow.py

示例5: mkdir

 def mkdir(self):
   for wd in self.local_wd, self.hdfs_wd:
     d1 = "%s/d1" % wd
     d2 = "%s/d2" % d1
     hdfs.mkdir(d2)
     dir_list = hdfs.ls(d1)
     self.assertEqual(len(dir_list), 1)
     self.assertTrue(dir_list[0].endswith(d2))

开发者ID:ZEMUSHKA，项目名称:pydoop，代码行数:8，代码来源:test_hdfs.py

示例6: init

 def __init__(self, prefix=None, logger=None):
     self.wd = self.exe = self.input = self.output = None
     self.logger = logger or utils.NullLogger()
     if prefix:
         self.wd = utils.make_random_str(prefix=prefix)
         hdfs.mkdir(self.wd)
         for n in "input", "output":
             setattr(self, n, hdfs.path.join(self.wd, n))

开发者ID:tivvit，项目名称:pydoop，代码行数:8，代码来源:hadut.py

示例7: __cp_dir

 def __cp_dir(self, wd):
   src_dir = "%s/src_dir" % wd
   hdfs.mkdir(src_dir)
   copy_on_wd = "%s/src_dir_copy" % wd
   copy_on_copy_on_wd = "%s/src_dir" % copy_on_wd
   hdfs.cp(src_dir, copy_on_wd)
   self.assertTrue(hdfs.path.exists(copy_on_wd))
   hdfs.cp(src_dir, copy_on_wd)
   self.assertTrue(hdfs.path.exists(copy_on_copy_on_wd))
   self.assertRaises(IOError, hdfs.cp, src_dir, copy_on_wd)

开发者ID:ZEMUSHKA，项目名称:pydoop，代码行数:10，代码来源:test_hdfs.py

示例8: __make_tree

 def __make_tree(self, wd):
   d1 = "%s/d1" % wd
   t1 = FSTree(d1)
   d2 = "%s/d2" % d1
   t2 = t1.add(d2)
   hdfs.mkdir(d2)
   for t, d, bn in ((t1, d1, "f1"), (t2, d2, "f2")):
     f = "%s/%s" % (d, bn)
     hdfs.dump(self.data, f)
     t.add(f, 0)
   return t1

开发者ID:ZEMUSHKA，项目名称:pydoop，代码行数:11，代码来源:test_hdfs.py

示例9: init

    def __init__(self, prefix=None, logger=None):
        hadoop_version_info = pydoop.hadoop_version_info()
        if hadoop_version_info.is_local():
            raise pydoop.LocalModeNotSupported()

        self.wd = self.exe = self.input = self.output = None
        self.logger = logger or utils.NullLogger()
        if prefix:
            self.wd = utils.make_random_str(prefix=prefix)
            hdfs.mkdir(self.wd)
            for n in "input", "output":
                setattr(self, n, hdfs.path.join(self.wd, n))

开发者ID:crs4，项目名称:pydoop，代码行数:12，代码来源:hadut.py

示例10: mapper

def mapper(_, record, writer, conf):
    out_dir = conf.get('out.dir', utils.make_random_str())
    if not hdfs.path.isdir(out_dir):
        hdfs.mkdir(out_dir)
        hdfs.chmod(out_dir, 'g+rwx')
    img_path = record.strip()
    a = get_array(img_path)
    out_a = calc_features(a)
    out_path = hdfs.path.join(out_dir, '%s.out' % hdfs.path.basename(img_path))
    with hdfs.open(out_path, 'w') as fo:
        np.save(fo, out_a)  # actual output
    hdfs.chmod(out_path, 'g+rw')
    writer.emit(img_path, fo.name)  # info (tab-separated input-output)

开发者ID:manics，项目名称:pydoop-features，代码行数:13，代码来源:features.py

示例11: __cp_file

 def __cp_file(self, wd):
   fn = "%s/fn" % wd
   hdfs.dump(self.data, fn)
   dest_dir = "%s/dest_dir" % wd
   hdfs.mkdir(dest_dir)
   fn_copy_on_wd = "%s/fn_copy" % wd
   hdfs.cp(fn, fn_copy_on_wd)
   self.assertEqual(hdfs.load(fn_copy_on_wd), self.data)
   self.assertRaises(IOError, hdfs.cp, fn, fn_copy_on_wd)
   fn_copy_on_dest_dir = "%s/fn" % dest_dir
   hdfs.cp(fn, dest_dir)
   self.assertEqual(hdfs.load(fn_copy_on_dest_dir), self.data)
   self.assertRaises(IOError, hdfs.cp, fn, dest_dir)

开发者ID:ZEMUSHKA，项目名称:pydoop，代码行数:13，代码来源:test_hdfs.py

示例12: test_isdir

 def test_isdir(self):
   path = utils.make_random_str()
   self.assertFalse(hdfs.path.isdir(path))
   try:
     hdfs.dump("foo\n", path)
     self.assertFalse(hdfs.path.isdir(path))
     hdfs.rmr(path)
     hdfs.mkdir(path)
     self.assertTrue(hdfs.path.isdir(path))
   finally:
     try:
       hdfs.rmr(path)
     except IOError:
       pass

开发者ID:ilveroluca，项目名称:pydoop，代码行数:14，代码来源:test_path.py

示例13: test_kind

 def test_kind(self):
     for path in self.path, self.u_path:
         self.assertTrue(hdfs.path.kind(path) is None)
         try:
             hdfs.dump("foo\n", path)
             self.assertEqual('file', hdfs.path.kind(path))
             hdfs.rmr(path)
             hdfs.mkdir(path)
             self.assertEqual('directory', hdfs.path.kind(path))
         finally:
             try:
                 hdfs.rmr(path)
             except IOError:
                 pass

开发者ID:kikkomep，项目名称:pydoop，代码行数:14，代码来源:test_path.py

示例14: test_isdir

 def test_isdir(self):
     for path in self.path, self.u_path:
         self.assertFalse(hdfs.path.isdir(path))
         try:
             hdfs.dump("foo\n", path)
             self.assertFalse(hdfs.path.isdir(path))
             hdfs.rmr(path)
             hdfs.mkdir(path)
             self.assertTrue(hdfs.path.isdir(path))
         finally:
             try:
                 hdfs.rmr(path)
             except IOError:
                 pass