本文整理汇总了Java中org.apache.hadoop.util.bloom.BloomFilter类的典型用法代码示例。如果您正苦于以下问题:Java BloomFilter类的具体用法?Java BloomFilter怎么用?Java BloomFilter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
BloomFilter类属于org.apache.hadoop.util.bloom包,在下文中一共展示了BloomFilter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: setup
import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
// TODO Create a FileSystem object
FileSystem fs = FileSystem.get(context.getConfiguration());
// TODO get the cache files from the context
URI[] uris = context.getCacheFiles();
if (uris.length > 0) {
// TODO create a new Bloom filter
filter = new BloomFilter();
// TODO call the filter's readFields method, passing in an FSDataInputStream
filter.readFields(fs.open(new Path(uris[0].toString())));
} else {
throw new IOException(
"Bloom filter file not in DistributedCache");
}
}
示例2: exec
import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
@Override
public Tuple exec(Tuple input) throws IOException {
if (input == null || input.size() == 0) return null;
// Strip off the initial level of bag
DataBag values = (DataBag)input.get(0);
Iterator<Tuple> it = values.iterator();
Tuple t = it.next();
// If the input tuple has only one field, then we'll extract
// that field and serialize it into a key. If it has multiple
// fields, we'll serialize the whole tuple.
byte[] b;
if (t.size() == 1) b = DataType.toBytes(t.get(0));
else b = DataType.toBytes(t, DataType.TUPLE);
Key k = new Key(b);
filter = new BloomFilter(vSize, numHash, hType);
filter.add(k);
return TupleFactory.getInstance().newTuple(bloomOut());
}
示例3: init
import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
private void init() throws IOException {
filter = new BloomFilter();
String dir = "./" + getFilenameFromPath(bloomFile);
String[] partFiles = new File(dir)
.list(new FilenameFilter() {
@Override
public boolean accept(File current, String name) {
return name.startsWith("part");
}
});
String dcFile = dir + "/" + partFiles[0];
DataInputStream dis = new DataInputStream(new FileInputStream(dcFile));
try {
filter.readFields(dis);
} finally {
dis.close();
}
}
示例4: readFromAvro
import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
public static BloomFilter readFromAvro(InputStream is) throws IOException {
DataFileStream<Object> reader =
new DataFileStream<Object>(
is, new GenericDatumReader<Object>());
reader.hasNext();
BloomFilter filter = new BloomFilter();
AvroBytesRecord
.fromGenericRecord((GenericRecord) reader.next(), filter);
IOUtils.closeQuietly(is);
IOUtils.closeQuietly(reader);
return filter;
}
示例5: run
import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
/**
* The MapReduce driver - setup and launch the job.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));
Configuration conf = super.getConf();
JobConf job = new JobConf(conf);
job.setJarByClass(BloomFilterCreator.class);
job.set(AvroJob.OUTPUT_SCHEMA, AvroBytesRecord.SCHEMA.toString());
job.set(AvroJob.OUTPUT_CODEC, SnappyCodec.class.getName());
job.setInputFormat(KeyValueTextInputFormat.class);
job.setOutputFormat(AvroOutputFormat.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(BloomFilter.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(BloomFilter.class);
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
return JobClient.runJob(job).isSuccessful() ? 0 : 1;
}
示例6: map
import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
@Override
public void map(Text key, Text value,
OutputCollector<NullWritable, BloomFilter> output,
Reporter reporter) throws IOException {
System.out.println("K[" + key + "]");
int age = Integer.valueOf(value.toString());
if (age > 30) {
filter.add(new Key(key.toString().getBytes()));
}
collector = output;
}
示例7: reduce
import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
@Override
public void reduce(NullWritable key, Iterator<BloomFilter> values,
OutputCollector<AvroWrapper<GenericRecord>,
NullWritable> output,
Reporter reporter) throws IOException {
while (values.hasNext()) {
BloomFilter bf = values.next();
filter.or(bf);
System.out.println(filter);
}
collector = output;
}
示例8: createBloomFilter
import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
public BloomFilter createBloomFilter(int numMembers, float falsePosRate) {
// TODO calculate the optimal Bloom filter size
// TODO and the optimal number of hash functions
int vectorSize = getOptimalBloomFilterSize(numMembers, falsePosRate);
int nbHash = getOptimalK(numMembers, vectorSize);
// TODO create new Bloom filter
BloomFilter filter = new BloomFilter(vectorSize, nbHash,
Hash.MURMUR_HASH);
return filter;
}
示例9: createBloomFilter
import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
public BloomFilter createBloomFilter(int numMembers, float falsePosRate) {
// TODO calculate the optimal Bloom filter size
// TODO and the optimal number of hash functions
// TODO create new Bloom filter
return null;
}
示例10: ElementIteratorReadIntoMemory
import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
ElementIteratorReadIntoMemory() throws RetrieverException {
vertices = extractVertices(seedsIter);
// Create Bloom filter, read through set of entities and add them to
// Bloom filter
final BloomFilter filter = BloomFilterUtils.getBloomFilter(store.getProperties().getFalsePositiveRate(),
vertices.size(), store.getProperties().getMaxBloomFilterToPassToAnIterator());
addToBloomFilter(vertices, filter);
initialise(filter);
}
示例11: ElementIteratorReadIntoMemory
import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
ElementIteratorReadIntoMemory() throws RetrieverException {
verticesA = extractVertices(seedSetAIter);
verticesB = extractVertices(seedSetBIter);
// Create Bloom filter, read through set of entities B and add them
// to Bloom filter
final BloomFilter filter = BloomFilterUtils.getBloomFilter(store.getProperties().getFalsePositiveRate(),
verticesB.size(), store.getProperties().getMaxBloomFilterToPassToAnIterator());
addToBloomFilter(verticesB, filter);
initialise(filter);
}
示例12: addToBloomFilter
import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
protected void addToBloomFilter(final Iterator<? extends Object> vertices, final BloomFilter filter)
throws RetrieverException {
try {
while (vertices.hasNext()) {
addToBloomFilter(vertices.next(), filter);
}
} finally {
CloseableUtil.close(vertices);
}
}
示例13: setFilter
import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
/**
* For testing only, do not use directly.
*/
public void setFilter(DataByteArray dba) throws IOException {
DataInputStream dis = new DataInputStream(new
ByteArrayInputStream(dba.get()));
filter = new BloomFilter();
filter.readFields(dis);
}
示例14: bloomOr
import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
protected DataByteArray bloomOr(Tuple input) throws IOException {
filter = new BloomFilter(vSize, numHash, hType);
try {
DataBag values = (DataBag)input.get(0);
for (Iterator<Tuple> it = values.iterator(); it.hasNext();) {
Tuple t = it.next();
filter.or(bloomIn((DataByteArray)t.get(0)));
}
} catch (ExecException ee) {
throw new IOException(ee);
}
return bloomOut();
}
示例15: bloomIn
import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
protected BloomFilter bloomIn(DataByteArray b) throws IOException {
DataInputStream dis = new DataInputStream(new
ByteArrayInputStream(b.get()));
BloomFilter f = new BloomFilter();
f.readFields(dis);
return f;
}