本文整理汇总了Java中org.apache.hadoop.mapred.lib.HashPartitioner类的典型用法代码示例。如果您正苦于以下问题:Java HashPartitioner类的具体用法?Java HashPartitioner怎么用?Java HashPartitioner使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
HashPartitioner类属于org.apache.hadoop.mapred.lib包,在下文中一共展示了HashPartitioner类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: configure
import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
public void configure(JobConf job) {
// 'key' == sortInput for sort-input; key == sortOutput for sort-output
key = deduceInputFile(job);
if (key == sortOutput) {
partitioner = new HashPartitioner<WritableComparable, Writable>();
// Figure the 'current' partition and no. of reduces of the 'sort'
try {
URI inputURI = new URI(job.get(JobContext.MAP_INPUT_FILE));
String inputFile = inputURI.getPath();
// part file is of the form part-r-xxxxx
partition = Integer.valueOf(inputFile.substring(
inputFile.lastIndexOf("part") + 7)).intValue();
noSortReducers = job.getInt(SORT_REDUCES, -1);
} catch (Exception e) {
System.err.println("Caught: " + e);
System.exit(-1);
}
}
}
示例2: dumpUrl
import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
/**
* Prints the content of the Node represented by the url to system out.
*
* @param webGraphDb
* The webgraph from which to get the node.
* @param url
* The url of the node.
*
* @throws IOException
* If an error occurs while getting the node.
*/
public void dumpUrl(Path webGraphDb, String url) throws IOException {
fs = FileSystem.get(getConf());
nodeReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb,
WebGraph.NODE_DIR), getConf());
// open the readers, get the node, print out the info, and close the readers
Text key = new Text(url);
Node node = new Node();
MapFileOutputFormat.getEntry(nodeReaders,
new HashPartitioner<Text, Node>(), key, node);
System.out.println(url + ":");
System.out.println(" inlink score: " + node.getInlinkScore());
System.out.println(" outlink score: " + node.getOutlinkScore());
System.out.println(" num inlinks: " + node.getNumInlinks());
System.out.println(" num outlinks: " + node.getNumOutlinks());
FSUtils.closeReaders(nodeReaders);
}
示例3: configure
import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
public void configure(JobConf job) {
// 'key' == sortInput for sort-input; key == sortOutput for sort-output
key = deduceInputFile(job);
if (key == sortOutput) {
partitioner = new HashPartitioner<WritableComparable, Writable>();
// Figure the 'current' partition and no. of reduces of the 'sort'
try {
URI inputURI = new URI(job.get("map.input.file"));
String inputFile = inputURI.getPath();
partition = Integer.valueOf(
inputFile.substring(inputFile.lastIndexOf("part")+5)
).intValue();
noSortReducers = job.getInt("sortvalidate.sort.reduce.tasks", -1);
} catch (Exception e) {
System.err.println("Caught: " + e);
System.exit(-1);
}
}
}
示例4: dumpUrl
import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
/**
* Prints loopset for a single url. The loopset information will show any
* outlink url the eventually forms a link cycle.
*
* @param webGraphDb The WebGraph to check for loops
* @param url The url to check.
*
* @throws IOException If an error occurs while printing loopset information.
*/
public void dumpUrl(Path webGraphDb, String url)
throws IOException {
// open the readers
fs = FileSystem.get(getConf());
loopReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb,
Loops.LOOPS_DIR), getConf());
// get the loopset for a given url, if any
Text key = new Text(url);
LoopSet loop = new LoopSet();
MapFileOutputFormat.getEntry(loopReaders,
new HashPartitioner<Text, LoopSet>(), key, loop);
// print out each loop url in the set
System.out.println(url + ":");
for (String loopUrl : loop.getLoopSet()) {
System.out.println(" " + loopUrl);
}
// close the readers
FSUtils.closeReaders(loopReaders);
}
示例5: dumpUrl
import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
/**
* Prints the content of the Node represented by the url to system out.
*
* @param webGraphDb The webgraph from which to get the node.
* @param url The url of the node.
*
* @throws IOException If an error occurs while getting the node.
*/
public void dumpUrl(Path webGraphDb, String url)
throws IOException {
fs = FileSystem.get(getConf());
nodeReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb,
WebGraph.NODE_DIR), getConf());
// open the readers, get the node, print out the info, and close the readers
Text key = new Text(url);
Node node = new Node();
MapFileOutputFormat.getEntry(nodeReaders,
new HashPartitioner<Text, Node>(), key, node);
System.out.println(url + ":");
System.out.println(" inlink score: " + node.getInlinkScore());
System.out.println(" outlink score: " + node.getOutlinkScore());
System.out.println(" num inlinks: " + node.getNumInlinks());
System.out.println(" num outlinks: " + node.getNumOutlinks());
FSUtils.closeReaders(nodeReaders);
}
示例6: get
import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
public CrawlDatum get(String crawlDb, String url, JobConf config)
throws IOException {
Text key = new Text(url);
CrawlDatum val = new CrawlDatum();
openReaders(crawlDb, config);
CrawlDatum res = (CrawlDatum) MapFileOutputFormat.getEntry(readers,
new HashPartitioner<Text, CrawlDatum>(), key, val);
return res;
}
示例7: main
import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
if (args == null || args.length < 2) {
System.out.println("LinkDumper$Reader usage: <webgraphdb> <url>");
return;
}
// open the readers for the linkdump directory
Configuration conf = NutchConfiguration.create();
FileSystem fs = FileSystem.get(conf);
Path webGraphDb = new Path(args[0]);
String url = args[1];
MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path(
webGraphDb, DUMP_DIR), conf);
// get the link nodes for the url
Text key = new Text(url);
LinkNodes nodes = new LinkNodes();
MapFileOutputFormat.getEntry(readers,
new HashPartitioner<Text, LinkNodes>(), key, nodes);
// print out the link nodes
LinkNode[] linkNodesAr = nodes.getLinks();
System.out.println(url + ":");
for (LinkNode node : linkNodesAr) {
System.out.println(" " + node.getUrl() + " - "
+ node.getNode().toString());
}
// close the readers
FSUtils.closeReaders(readers);
}
示例8: get
import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
public CrawlDatum get(String crawlDb, String url, Configuration config) throws IOException {
Text key = new Text(url);
CrawlDatum val = new CrawlDatum();
openReaders(crawlDb, config);
CrawlDatum res = (CrawlDatum)MapFileOutputFormat.getEntry(readers,
new HashPartitioner<Text, CrawlDatum>(), key, val);
return res;
}
示例9: main
import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
public static void main(String[] args)
throws Exception {
if (args == null || args.length < 2) {
System.out.println("LinkDumper$Reader usage: <webgraphdb> <url>");
return;
}
// open the readers for the linkdump directory
Configuration conf = NutchConfiguration.create();
FileSystem fs = FileSystem.get(conf);
Path webGraphDb = new Path(args[0]);
String url = args[1];
MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path(
webGraphDb, DUMP_DIR), conf);
// get the link nodes for the url
Text key = new Text(url);
LinkNodes nodes = new LinkNodes();
MapFileOutputFormat.getEntry(readers,
new HashPartitioner<Text, LinkNodes>(), key, nodes);
// print out the link nodes
LinkNode[] linkNodesAr = nodes.getLinks();
System.out.println(url + ":");
for (LinkNode node : linkNodesAr) {
System.out.println(" " + node.getUrl() + " - "
+ node.getNode().toString());
}
// close the readers
FSUtils.closeReaders(readers);
}
示例10: getJavaPartitioner
import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
/**
* Get the user's original partitioner.
* @param conf the configuration to look in
* @return the class that the user submitted
*/
static Class<? extends Partitioner> getJavaPartitioner(JobConf conf) {
return conf.getClass(Submitter.PARTITIONER,
HashPartitioner.class,
Partitioner.class);
}
示例11: getJavaPartitioner
import org.apache.hadoop.mapred.lib.HashPartitioner; //导入依赖的package包/类
/**
* Get the user's original partitioner.
* @param conf the configuration to look in
* @return the class that the user submitted
*/
static Class<? extends Partitioner> getJavaPartitioner(JobConf conf) {
return conf.getClass("hadoop.pipes.partitioner",
HashPartitioner.class,
Partitioner.class);
}