Java HashPartitioner类代码示例

本文整理汇总了Java中org.apache.hadoop.mapred.lib.HashPartitioner类的典型用法代码示例。如果您正苦于以下问题：Java HashPartitioner类的具体用法？Java HashPartitioner怎么用？Java HashPartitioner使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

HashPartitioner类属于org.apache.hadoop.mapred.lib包，在下文中一共展示了HashPartitioner类的11个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: configure

import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
public void configure(JobConf job) {
  // 'key' == sortInput for sort-input; key == sortOutput for sort-output
  key = deduceInputFile(job);
  
  if (key == sortOutput) {
    partitioner = new HashPartitioner<WritableComparable, Writable>();
    
    // Figure the 'current' partition and no. of reduces of the 'sort'
    try {
      URI inputURI = new URI(job.get(JobContext.MAP_INPUT_FILE));
      String inputFile = inputURI.getPath();
      // part file is of the form part-r-xxxxx
      partition = Integer.valueOf(inputFile.substring(
        inputFile.lastIndexOf("part") + 7)).intValue();
      noSortReducers = job.getInt(SORT_REDUCES, -1);
    } catch (Exception e) {
      System.err.println("Caught: " + e);
      System.exit(-1);
    }
  }
}

开发者ID:naver，项目名称:hadoop，代码行数:22，代码来源:SortValidator.java

示例2: dumpUrl

import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
/**
 * Prints the content of the Node represented by the url to system out.
 * 
 * @param webGraphDb
 *          The webgraph from which to get the node.
 * @param url
 *          The url of the node.
 * 
 * @throws IOException
 *           If an error occurs while getting the node.
 */
public void dumpUrl(Path webGraphDb, String url) throws IOException {

  fs = FileSystem.get(getConf());
  nodeReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb,
      WebGraph.NODE_DIR), getConf());

  // open the readers, get the node, print out the info, and close the readers
  Text key = new Text(url);
  Node node = new Node();
  MapFileOutputFormat.getEntry(nodeReaders,
      new HashPartitioner<Text, Node>(), key, node);
  System.out.println(url + ":");
  System.out.println("  inlink score: " + node.getInlinkScore());
  System.out.println("  outlink score: " + node.getOutlinkScore());
  System.out.println("  num inlinks: " + node.getNumInlinks());
  System.out.println("  num outlinks: " + node.getNumOutlinks());
  FSUtils.closeReaders(nodeReaders);
}

开发者ID:jorcox，项目名称:GeoCrawler，代码行数:30，代码来源:NodeReader.java

示例3: configure

import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
public void configure(JobConf job) {
  // 'key' == sortInput for sort-input; key == sortOutput for sort-output
  key = deduceInputFile(job);
  
  if (key == sortOutput) {
    partitioner = new HashPartitioner<WritableComparable, Writable>();
    
    // Figure the 'current' partition and no. of reduces of the 'sort'
    try {
      URI inputURI = new URI(job.get("map.input.file"));
      String inputFile = inputURI.getPath();
      partition = Integer.valueOf(
                                  inputFile.substring(inputFile.lastIndexOf("part")+5)
                                  ).intValue();
      noSortReducers = job.getInt("sortvalidate.sort.reduce.tasks", -1);
    } catch (Exception e) {
      System.err.println("Caught: " + e);
      System.exit(-1);
    }
  }
}

开发者ID:Nextzero，项目名称:hadoop-2.6.0-cdh5.4.3，代码行数:22，代码来源:SortValidator.java

示例4: dumpUrl

import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
/**
 * Prints loopset for a single url.  The loopset information will show any
 * outlink url the eventually forms a link cycle.
 * 
 * @param webGraphDb The WebGraph to check for loops
 * @param url The url to check.
 * 
 * @throws IOException If an error occurs while printing loopset information.
 */
public void dumpUrl(Path webGraphDb, String url)
  throws IOException {

  // open the readers
  fs = FileSystem.get(getConf());
  loopReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb,
    Loops.LOOPS_DIR), getConf());

  // get the loopset for a given url, if any
  Text key = new Text(url);
  LoopSet loop = new LoopSet();
  MapFileOutputFormat.getEntry(loopReaders,
    new HashPartitioner<Text, LoopSet>(), key, loop);

  // print out each loop url in the set
  System.out.println(url + ":");
  for (String loopUrl : loop.getLoopSet()) {
    System.out.println("  " + loopUrl);
  }

  // close the readers
  FSUtils.closeReaders(loopReaders);
}

开发者ID:yahoo，项目名称:anthelion，代码行数:33，代码来源:LoopReader.java

示例5: dumpUrl

import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
/**
 * Prints the content of the Node represented by the url to system out.
 * 
 * @param webGraphDb The webgraph from which to get the node.
 * @param url The url of the node.
 * 
 * @throws IOException If an error occurs while getting the node.
 */
public void dumpUrl(Path webGraphDb, String url)
  throws IOException {

  fs = FileSystem.get(getConf());
  nodeReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb,
    WebGraph.NODE_DIR), getConf());

  // open the readers, get the node, print out the info, and close the readers
  Text key = new Text(url);
  Node node = new Node();
  MapFileOutputFormat.getEntry(nodeReaders,
    new HashPartitioner<Text, Node>(), key, node);
  System.out.println(url + ":");
  System.out.println("  inlink score: " + node.getInlinkScore());
  System.out.println("  outlink score: " + node.getOutlinkScore());
  System.out.println("  num inlinks: " + node.getNumInlinks());
  System.out.println("  num outlinks: " + node.getNumOutlinks());
  FSUtils.closeReaders(nodeReaders);
}

开发者ID:yahoo，项目名称:anthelion，代码行数:28，代码来源:NodeReader.java

示例6: get

import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
public CrawlDatum get(String crawlDb, String url, JobConf config)
    throws IOException {
  Text key = new Text(url);
  CrawlDatum val = new CrawlDatum();
  openReaders(crawlDb, config);
  CrawlDatum res = (CrawlDatum) MapFileOutputFormat.getEntry(readers,
      new HashPartitioner<Text, CrawlDatum>(), key, val);
  return res;
}

开发者ID:jorcox，项目名称:GeoCrawler，代码行数:10，代码来源:CrawlDbReader.java

示例7: main

import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
public static void main(String[] args) throws Exception {

      if (args == null || args.length < 2) {
        System.out.println("LinkDumper$Reader usage: <webgraphdb> <url>");
        return;
      }

      // open the readers for the linkdump directory
      Configuration conf = NutchConfiguration.create();
      FileSystem fs = FileSystem.get(conf);
      Path webGraphDb = new Path(args[0]);
      String url = args[1];
      MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path(
          webGraphDb, DUMP_DIR), conf);

      // get the link nodes for the url
      Text key = new Text(url);
      LinkNodes nodes = new LinkNodes();
      MapFileOutputFormat.getEntry(readers,
          new HashPartitioner<Text, LinkNodes>(), key, nodes);

      // print out the link nodes
      LinkNode[] linkNodesAr = nodes.getLinks();
      System.out.println(url + ":");
      for (LinkNode node : linkNodesAr) {
        System.out.println("  " + node.getUrl() + " - "
            + node.getNode().toString());
      }

      // close the readers
      FSUtils.closeReaders(readers);
    }

开发者ID:jorcox，项目名称:GeoCrawler，代码行数:33，代码来源:LinkDumper.java

示例8: get

import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
public CrawlDatum get(String crawlDb, String url, Configuration config) throws IOException {
  Text key = new Text(url);
  CrawlDatum val = new CrawlDatum();
  openReaders(crawlDb, config);
  CrawlDatum res = (CrawlDatum)MapFileOutputFormat.getEntry(readers,
      new HashPartitioner<Text, CrawlDatum>(), key, val);
  return res;
}

开发者ID:yahoo，项目名称:anthelion，代码行数:9，代码来源:CrawlDbReader.java

示例9: main

import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
public static void main(String[] args)
  throws Exception {
  
  if (args == null || args.length < 2) {
    System.out.println("LinkDumper$Reader usage: <webgraphdb> <url>");
    return;
  }

  // open the readers for the linkdump directory
  Configuration conf = NutchConfiguration.create();
  FileSystem fs = FileSystem.get(conf);
  Path webGraphDb = new Path(args[0]);
  String url = args[1];
  MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path(
    webGraphDb, DUMP_DIR), conf);

  // get the link nodes for the url
  Text key = new Text(url);
  LinkNodes nodes = new LinkNodes();
  MapFileOutputFormat.getEntry(readers,
    new HashPartitioner<Text, LinkNodes>(), key, nodes);

  // print out the link nodes
  LinkNode[] linkNodesAr = nodes.getLinks();
  System.out.println(url + ":");
  for (LinkNode node : linkNodesAr) {
    System.out.println("  " + node.getUrl() + " - "
      + node.getNode().toString());
  }

  // close the readers
  FSUtils.closeReaders(readers);
}

开发者ID:yahoo，项目名称:anthelion，代码行数:34，代码来源:LinkDumper.java

示例10: getJavaPartitioner

import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
/**
 * Get the user's original partitioner.
 * @param conf the configuration to look in
 * @return the class that the user submitted
 */
static Class<? extends Partitioner> getJavaPartitioner(JobConf conf) {
  return conf.getClass(Submitter.PARTITIONER, 
                       HashPartitioner.class,
                       Partitioner.class);
}

开发者ID:naver，项目名称:hadoop，代码行数:11，代码来源:Submitter.java

示例11: getJavaPartitioner

import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
/**
 * Get the user's original partitioner.
 * @param conf the configuration to look in
 * @return the class that the user submitted
 */
static Class<? extends Partitioner> getJavaPartitioner(JobConf conf) {
  return conf.getClass("hadoop.pipes.partitioner", 
                       HashPartitioner.class,
                       Partitioner.class);
}

开发者ID:Nextzero，项目名称:hadoop-2.6.0-cdh5.4.3，代码行数:11，代码来源:Submitter.java

注：本文中的org.apache.hadoop.mapred.lib.HashPartitioner类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。