本文整理匯總了Java中org.apache.hadoop.io.MapFile.Writer方法的典型用法代碼示例。如果您正苦於以下問題:Java MapFile.Writer方法的具體用法?Java MapFile.Writer怎麽用?Java MapFile.Writer使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.hadoop.io.MapFile
的用法示例。
在下文中一共展示了MapFile.Writer方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: createCrawlDb
import org.apache.hadoop.io.MapFile; //導入方法依賴的package包/類
private void createCrawlDb(Configuration config, FileSystem fs, Path crawldb,
TreeSet<String> init, CrawlDatum cd) throws Exception {
LOG.fine("* creating crawldb: " + crawldb);
Path dir = new Path(crawldb, CrawlDb.CURRENT_NAME);
Option wKeyOpt = MapFile.Writer.keyClass(Text.class);
org.apache.hadoop.io.SequenceFile.Writer.Option wValueOpt = SequenceFile.Writer.valueClass(CrawlDatum.class);
MapFile.Writer writer = new MapFile.Writer(config, new Path(dir,
"part-00000"), wKeyOpt, wValueOpt);
Iterator<String> it = init.iterator();
while (it.hasNext()) {
String key = it.next();
writer.append(new Text(key), cd);
}
writer.close();
}
示例2: createCrawlDb
import org.apache.hadoop.io.MapFile; //導入方法依賴的package包/類
/**
* Creates synthetic crawldb
*
* @param fs
* filesystem where db will be created
* @param crawldb
* path were db will be created
* @param init
* urls to be inserted, objects are of type URLCrawlDatum
* @throws Exception
*/
public static void createCrawlDb(Configuration conf, FileSystem fs,
Path crawldb, List<URLCrawlDatum> init) throws Exception {
LOG.trace("* creating crawldb: " + crawldb);
Path dir = new Path(crawldb, CrawlDb.CURRENT_NAME);
Option wKeyOpt = MapFile.Writer.keyClass(Text.class);
org.apache.hadoop.io.SequenceFile.Writer.Option wValueOpt = SequenceFile.Writer.valueClass(CrawlDatum.class);
MapFile.Writer writer = new MapFile.Writer(conf, new Path(dir,
"part-00000"), wKeyOpt, wValueOpt);
Iterator<URLCrawlDatum> it = init.iterator();
while (it.hasNext()) {
URLCrawlDatum row = it.next();
LOG.info("adding:" + row.url.toString());
writer.append(new Text(row.url), row.datum);
}
writer.close();
}
示例3: createLinkDb
import org.apache.hadoop.io.MapFile; //導入方法依賴的package包/類
private void createLinkDb(Configuration config, FileSystem fs, Path linkdb,
TreeMap<String, String[]> init) throws Exception {
LOG.fine("* creating linkdb: " + linkdb);
Path dir = new Path(linkdb, LinkDb.CURRENT_NAME);
Option wKeyOpt = MapFile.Writer.keyClass(Text.class);
org.apache.hadoop.io.SequenceFile.Writer.Option wValueOpt = SequenceFile.Writer.valueClass(Inlinks.class);
MapFile.Writer writer = new MapFile.Writer(config, new Path(dir,
"part-00000"), wKeyOpt, wValueOpt);
Iterator<String> it = init.keySet().iterator();
while (it.hasNext()) {
String key = it.next();
Inlinks inlinks = new Inlinks();
String[] vals = init.get(key);
for (int i = 0; i < vals.length; i++) {
Inlink in = new Inlink(vals[i], vals[i]);
inlinks.add(in);
}
writer.append(new Text(key), inlinks);
}
writer.close();
}
示例4: createMapFile
import org.apache.hadoop.io.MapFile; //導入方法依賴的package包/類
private static void createMapFile(Configuration conf, FileSystem fs, Path path,
CompressionCodec codec, CompressionType type, int records) throws IOException {
MapFile.Writer writer =
new MapFile.Writer(conf, path,
MapFile.Writer.keyClass(Text.class),
MapFile.Writer.valueClass(Text.class),
MapFile.Writer.compression(type, codec));
Text key = new Text();
for (int j = 0; j < records; j++) {
key.set(String.format("%03d", j));
writer.append(key, key);
}
writer.close();
}
示例5: getRecordWriter
import org.apache.hadoop.io.MapFile; //導入方法依賴的package包/類
public RecordWriter<WritableComparable, Writable> getRecordWriter(FileSystem ignored, JobConf job,
String name, Progressable progress)
throws IOException {
// get the path of the temporary output file
Path file = FileOutputFormat.getTaskOutputPath(job, name);
FileSystem fs = file.getFileSystem(job);
CompressionCodec codec = null;
CompressionType compressionType = CompressionType.NONE;
if (getCompressOutput(job)) {
// find the kind of compression to do
compressionType = SequenceFileOutputFormat.getOutputCompressionType(job);
// find the right codec
Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job,
DefaultCodec.class);
codec = ReflectionUtils.newInstance(codecClass, job);
}
// ignore the progress parameter, since MapFile is local
final MapFile.Writer out =
new MapFile.Writer(job, fs, file.toString(),
job.getOutputKeyClass().asSubclass(WritableComparable.class),
job.getOutputValueClass().asSubclass(Writable.class),
compressionType, codec,
progress);
return new RecordWriter<WritableComparable, Writable>() {
public void write(WritableComparable key, Writable value)
throws IOException {
out.append(key, value);
}
public void close(Reporter reporter) throws IOException { out.close();}
};
}
示例6: close
import org.apache.hadoop.io.MapFile; //導入方法依賴的package包/類
public void close() {
try {
for (MapFile.Writer w : writers) {
w.close();
}
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
isClosed.set(true);
}
}
示例7: prepareWriter
import org.apache.hadoop.io.MapFile; //導入方法依賴的package包/類
private void prepareWriter() throws IOException {
if (reader != null) {
reader.close();
reader = null;
}
if (writer == null) {
writer = new MapFile.Writer(conf, fs, qualifiedDirName.toString(), Text.class, Text.class);
}
}
示例8: setUp
import org.apache.hadoop.io.MapFile; //導入方法依賴的package包/類
public void setUp() throws Exception {
conf = NutchConfiguration.create();
fs = FileSystem.get(conf);
long blkSize = fs.getDefaultBlockSize();
testDir = new Path(conf.get("hadoop.tmp.dir"), "merge-" + System.currentTimeMillis());
seg1 = new Path(testDir, "seg1");
seg2 = new Path(testDir, "seg2");
out = new Path(testDir, "out");
// create large parse-text segments
System.err.println("Creating large segment 1...");
DecimalFormat df = new DecimalFormat("0000000");
Text k = new Text();
Path ptPath = new Path(new Path(seg1, ParseText.DIR_NAME), "part-00000");
MapFile.Writer w = new MapFile.Writer(conf, fs, ptPath.toString(), Text.class, ParseText.class);
long curSize = 0;
countSeg1 = 0;
while (curSize < blkSize * 2) {
k.set("seg1-" + df.format(countSeg1));
w.append(k, new ParseText("seg1 text " + countSeg1));
countSeg1++;
curSize += 40; // roughly ...
}
w.close();
System.err.println(" - done: " + countSeg1 + " records.");
System.err.println("Creating large segment 2...");
ptPath = new Path(new Path(seg2, ParseText.DIR_NAME), "part-00000");
w = new MapFile.Writer(conf, fs, ptPath.toString(), Text.class, ParseText.class);
curSize = 0;
countSeg2 = 0;
while (curSize < blkSize * 2) {
k.set("seg2-" + df.format(countSeg2));
w.append(k, new ParseText("seg2 text " + countSeg2));
countSeg2++;
curSize += 40; // roughly ...
}
w.close();
System.err.println(" - done: " + countSeg2 + " records.");
}
示例9: createCrawlDb
import org.apache.hadoop.io.MapFile; //導入方法依賴的package包/類
private void createCrawlDb(Configuration config, FileSystem fs, Path crawldb, TreeSet init, CrawlDatum cd) throws Exception {
LOG.fine("* creating crawldb: " + crawldb);
Path dir = new Path(crawldb, CrawlDb.CURRENT_NAME);
MapFile.Writer writer = new MapFile.Writer(config, fs, new Path(dir, "part-00000").toString(), Text.class, CrawlDatum.class);
Iterator it = init.iterator();
while (it.hasNext()) {
String key = (String)it.next();
writer.append(new Text(key), cd);
}
writer.close();
}
示例10: createCrawlDb
import org.apache.hadoop.io.MapFile; //導入方法依賴的package包/類
/**
* Creates synthetic crawldb
*
* @param fs
* filesystem where db will be created
* @param crawldb
* path were db will be created
* @param init
* urls to be inserted, objects are of type URLCrawlDatum
* @throws Exception
*/
public static void createCrawlDb(Configuration conf, FileSystem fs, Path crawldb, List<URLCrawlDatum> init)
throws Exception {
LOG.trace("* creating crawldb: " + crawldb);
Path dir = new Path(crawldb, CrawlDb.CURRENT_NAME);
MapFile.Writer writer = new MapFile.Writer(conf, fs, new Path(dir, "part-00000")
.toString(), Text.class, CrawlDatum.class);
Iterator<URLCrawlDatum> it = init.iterator();
while (it.hasNext()) {
URLCrawlDatum row = it.next();
LOG.info("adding:" + row.url.toString());
writer.append(new Text(row.url), row.datum);
}
writer.close();
}
示例11: startWrite
import org.apache.hadoop.io.MapFile; //導入方法依賴的package包/類
public synchronized void startWrite() throws IOException {
assertNotWrite("Tried to restart write");
cleanupRead();
m_writer = new MapFile.Writer(m_config, m_fs, m_dirStr, Text.class, BytesWritable.class);
m_writer.setIndexInterval(1);
m_fs.mkdirs(m_metaPath);
m_mode = Mode.WRITE;
}
示例12: saveLSMapFile
import org.apache.hadoop.io.MapFile; //導入方法依賴的package包/類
public static void saveLSMapFile(LocalStructure[][] inputls, Configuration conf) {
String name = conf.get(Util.MAPFILENAMEPROPERTY, Util.MAPFILEDEFAULTNAME);
MapFile.Writer lsmapfile = Util.createMapFileWriter(conf, name, Text.class, inputls[0][0].newArrayWritable().getClass());
Arrays.sort(inputls, new Comparator<LocalStructure[]>() {
public int compare(LocalStructure [] als1, LocalStructure [] als2) {
return als1[0].getFpid().compareTo(als2[0].getFpid());
}
});
Text fpid = new Text();
ArrayWritable aw = null;
for(LocalStructure [] ails : inputls) {
fpid.set(ails[0].getFpid());
ails = Util.removeNonValidLS(ails);
try {
aw = ails[0].newArrayWritable(ails);
lsmapfile.append(fpid, aw);
} catch (IOException e) {
System.err.println("LocalStructure.saveLSMapFile: unable to save fingerprint "
+ fpid.toString() + " in MapFile " + name + ": " + e.getMessage());
e.printStackTrace();
}
}
IOUtils.closeStream(lsmapfile);
}
示例13: saveLSMapFile
import org.apache.hadoop.io.MapFile; //導入方法依賴的package包/類
public static void saveLSMapFile(LocalStructure[][] inputls, Configuration conf) {
String name = conf.get(Util.MAPFILENAMEPROPERTY, Util.MAPFILEDEFAULTNAME);
MapFile.Writer lsmapfile = Util.createMapFileWriter(conf, name, Text.class, inputls[0][0].newArrayWritable().getClass());
Arrays.sort(inputls, new Comparator<LocalStructure[]>() {
public int compare(LocalStructure [] als1, LocalStructure [] als2) {
return als1[0].getFpid().compareTo(als2[0].getFpid());
}
});
Text fpid = new Text();
ArrayWritable aw = null;
for(LocalStructure [] ails : inputls) {
fpid.set(ails[0].getFpid());
try {
aw = ails[0].newArrayWritable(ails);
lsmapfile.append(fpid, aw);
} catch (IOException e) {
System.err.println("LocalStructureCylinder.saveLSMapFile: unable to save fingerprint "
+ fpid.toString() + " in MapFile " + name + ": " + e.getMessage());
e.printStackTrace();
}
}
IOUtils.closeStream(lsmapfile);
}
示例14: saveInfoFile
import org.apache.hadoop.io.MapFile; //導入方法依賴的package包/類
public void saveInfoFile(LocalStructure[][] inputls, Configuration conf) {
String name = conf.get(Util.INFOFILENAMEPROPERTY, Util.INFOFILEDEFAULTNAME);
MapFile.Writer lsmapfile = Util.createMapFileWriter(conf, name, Text.class, MinutiaArray.class);
Arrays.sort(inputls, new Comparator<LocalStructure[]>() {
public int compare(LocalStructure [] als1, LocalStructure [] als2) {
return als1[0].getFpid().compareTo(als2[0].getFpid());
}
});
Text fpid = new Text();
for(LocalStructure [] ails : inputls) {
fpid.set(ails[0].getFpid());
Minutia [] ma = new Minutia[ails.length];
try {
for(int i = 0; i < ails.length; i++)
ma[i] = ((LocalStructureCylinder) ails[i]).getMinutia();
lsmapfile.append(fpid, new MinutiaArray(ma));
} catch (IOException e) {
System.err.println("LocalStructure.saveLSMapFile: unable to save fingerprint "
+ fpid.toString() + " in MapFile " + name + ": " + e.getMessage());
e.printStackTrace();
}
}
IOUtils.closeStream(lsmapfile);
return;
}
示例15: setUp
import org.apache.hadoop.io.MapFile; //導入方法依賴的package包/類
@Override
void setUp() throws Exception {
writer = new MapFile.Writer(conf, fs, mf.toString(),
ImmutableBytesWritable.class, ImmutableBytesWritable.class);
}