当前位置: 首页>>代码示例>>Java>>正文


Java PathType.GLOB属性代码示例

本文整理汇总了Java中org.apache.mahout.common.iterator.sequencefile.PathType.GLOB属性的典型用法代码示例。如果您正苦于以下问题:Java PathType.GLOB属性的具体用法?Java PathType.GLOB怎么用?Java PathType.GLOB使用的例子?那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在org.apache.mahout.common.iterator.sequencefile.PathType的用法示例。


在下文中一共展示了PathType.GLOB属性的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: process

/**
 * This method takes the clustered points output by the clustering algorithms as input and writes them into
 * their respective clusters.
 */
public void process() throws IOException {
  createPostProcessDirectory();
  for (Pair<?,WeightedVectorWritable> record : 
       new SequenceFileDirIterable<Writable,WeightedVectorWritable>(clusteredPoints,
                                                                    PathType.GLOB,
                                                                    PathFilters.partFilter(),
                                                                    null,
                                                                    false,
                                                                    conf)) {
    String clusterId = record.getFirst().toString().trim();
    putVectorInRespectiveCluster(clusterId, record.getSecond());
  }
  IOUtils.close(writersForClusters.values());
  writersForClusters.clear();
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:19,代码来源:ClusterOutputPostProcessor.java

示例2: getFileStatus

public static FileStatus[] getFileStatus(Path path, PathType pathType, PathFilter filter, Comparator<FileStatus> ordering, Configuration conf) throws IOException {
  FileStatus[] statuses;
  FileSystem fs = path.getFileSystem(conf);
  if (filter == null) {
    statuses = pathType == PathType.GLOB ? fs.globStatus(path) : listStatus(fs, path);
  } else {
    statuses = pathType == PathType.GLOB ? fs.globStatus(path, filter) : listStatus(fs, path, filter);
  }
  if (ordering != null) {
    Arrays.sort(statuses, ordering);
  }
  return statuses;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:13,代码来源:HadoopUtil.java

示例3: createDictionaryChunks

/**
 * Read the feature frequency List which is built at the end of the Word Count Job and assign ids to them.
 * This will use constant memory and will run at the speed of your disk read
 */
private static List<Path> createDictionaryChunks(Path wordCountPath,
                                                 Path dictionaryPathBase,
                                                 Configuration baseConf,
                                                 int chunkSizeInMegabytes,
                                                 int[] maxTermDimension) throws IOException {
  List<Path> chunkPaths = Lists.newArrayList();
  
  Configuration conf = new Configuration(baseConf);
  
  FileSystem fs = FileSystem.get(wordCountPath.toUri(), conf);

  long chunkSizeLimit = chunkSizeInMegabytes * 1024L * 1024L;
  int chunkIndex = 0;
  Path chunkPath = new Path(dictionaryPathBase, DICTIONARY_FILE + chunkIndex);
  chunkPaths.add(chunkPath);
  
  SequenceFile.Writer dictWriter = new SequenceFile.Writer(fs, conf, chunkPath, Text.class, IntWritable.class);

  try {
    long currentChunkSize = 0;
    Path filesPattern = new Path(wordCountPath, OUTPUT_FILES_PATTERN);
    int i = 0;
    for (Pair<Writable,Writable> record
         : new SequenceFileDirIterable<Writable,Writable>(filesPattern, PathType.GLOB, null, null, true, conf)) {
      if (currentChunkSize > chunkSizeLimit) {
        Closeables.closeQuietly(dictWriter);
        chunkIndex++;

        chunkPath = new Path(dictionaryPathBase, DICTIONARY_FILE + chunkIndex);
        chunkPaths.add(chunkPath);

        dictWriter = new SequenceFile.Writer(fs, conf, chunkPath, Text.class, IntWritable.class);
        currentChunkSize = 0;
      }

      Writable key = record.getFirst();
      int fieldSize = DICTIONARY_BYTE_OVERHEAD + key.toString().length() * 2 + Integer.SIZE / 8;
      currentChunkSize += fieldSize;
      dictWriter.append(key, new IntWritable(i++));
    }
    maxTermDimension[0] = i;
  } finally {
    Closeables.closeQuietly(dictWriter);
  }
  
  return chunkPaths;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:51,代码来源:DictionaryVectorizer.java

示例4: createDictionaryChunks

/**
 * Read the document frequency List which is built at the end of the DF Count Job. This will use constant
 * memory and will run at the speed of your disk read
 */
private static Pair<Long[], List<Path>> createDictionaryChunks(Path featureCountPath,
                                                               Path dictionaryPathBase,
                                                               Configuration baseConf,
                                                               int chunkSizeInMegabytes) throws IOException {
  List<Path> chunkPaths = Lists.newArrayList();
  Configuration conf = new Configuration(baseConf);

  FileSystem fs = FileSystem.get(featureCountPath.toUri(), conf);

  long chunkSizeLimit = chunkSizeInMegabytes * 1024L * 1024L;
  int chunkIndex = 0;
  Path chunkPath = new Path(dictionaryPathBase, FREQUENCY_FILE + chunkIndex);
  chunkPaths.add(chunkPath);
  SequenceFile.Writer freqWriter =
    new SequenceFile.Writer(fs, conf, chunkPath, IntWritable.class, LongWritable.class);

  try {
    long currentChunkSize = 0;
    long featureCount = 0;
    long vectorCount = Long.MAX_VALUE;
    Path filesPattern = new Path(featureCountPath, OUTPUT_FILES_PATTERN);
    for (Pair<IntWritable,LongWritable> record
         : new SequenceFileDirIterable<IntWritable,LongWritable>(filesPattern,
                                                                 PathType.GLOB,
                                                                 null,
                                                                 null,
                                                                 true,
                                                                 conf)) {

      if (currentChunkSize > chunkSizeLimit) {
        Closeables.closeQuietly(freqWriter);
        chunkIndex++;

        chunkPath = new Path(dictionaryPathBase, FREQUENCY_FILE + chunkIndex);
        chunkPaths.add(chunkPath);

        freqWriter = new SequenceFile.Writer(fs, conf, chunkPath, IntWritable.class, LongWritable.class);
        currentChunkSize = 0;
      }

      int fieldSize = SEQUENCEFILE_BYTE_OVERHEAD + Integer.SIZE / 8 + Long.SIZE / 8;
      currentChunkSize += fieldSize;
      IntWritable key = record.getFirst();
      LongWritable value = record.getSecond();
      if (key.get() >= 0) {
        freqWriter.append(key, value);
      } else if (key.get() == -1) {
        vectorCount = value.get();
      }
      featureCount = Math.max(key.get(), featureCount);

    }
    featureCount++;
    Long[] counts = {featureCount, vectorCount};
    return new Pair<Long[], List<Path>>(counts, chunkPaths);
  } finally {
    Closeables.closeQuietly(freqWriter);
  }
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:63,代码来源:TFIDFConverter.java


注:本文中的org.apache.mahout.common.iterator.sequencefile.PathType.GLOB属性示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。