当前位置: 首页>>代码示例>>Java>>正文


Java BloomFilter类代码示例

本文整理汇总了Java中org.apache.hadoop.util.bloom.BloomFilter的典型用法代码示例。如果您正苦于以下问题:Java BloomFilter类的具体用法?Java BloomFilter怎么用?Java BloomFilter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


BloomFilter类属于org.apache.hadoop.util.bloom包,在下文中一共展示了BloomFilter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: setup

import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
@Override
protected void setup(Context context) throws IOException,
		InterruptedException {

	// TODO Create a FileSystem object
	FileSystem fs = FileSystem.get(context.getConfiguration());

	// TODO get the cache files from the context
	URI[] uris = context.getCacheFiles();

	if (uris.length > 0) {
		// TODO create a new Bloom filter
		filter = new BloomFilter();
		
		// TODO call the filter's readFields method, passing in an FSDataInputStream
		filter.readFields(fs.open(new Path(uris[0].toString())));
	} else {
		throw new IOException(
				"Bloom filter file not in DistributedCache");
	}
}
 
开发者ID:adamjshook,项目名称:bloomfilter-course,代码行数:22,代码来源:MRBloomFilter.java

示例2: exec

import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
@Override
public Tuple exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0) return null;

    // Strip off the initial level of bag
    DataBag values = (DataBag)input.get(0);
    Iterator<Tuple> it = values.iterator();
    Tuple t = it.next();

    // If the input tuple has only one field, then we'll extract
    // that field and serialize it into a key.  If it has multiple
    // fields, we'll serialize the whole tuple.
    byte[] b;
    if (t.size() == 1) b = DataType.toBytes(t.get(0));
    else b = DataType.toBytes(t, DataType.TUPLE);

    Key k = new Key(b);
    filter = new BloomFilter(vSize, numHash, hType);
    filter.add(k);

    return TupleFactory.getInstance().newTuple(bloomOut());
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:23,代码来源:BuildBloom.java

示例3: init

import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
private void init() throws IOException {
    filter = new BloomFilter();
    String dir = "./" + getFilenameFromPath(bloomFile);
    String[] partFiles = new File(dir)
            .list(new FilenameFilter() {
                @Override
                public boolean accept(File current, String name) {
                    return name.startsWith("part");
                }
            });

    String dcFile = dir + "/" + partFiles[0];
    DataInputStream dis = new DataInputStream(new FileInputStream(dcFile));
    try {
        filter.readFields(dis);
    } finally {
        dis.close();
    }
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:20,代码来源:Bloom.java

示例4: readFromAvro

import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
public static BloomFilter readFromAvro(InputStream is) throws IOException {
  DataFileStream<Object> reader =
      new DataFileStream<Object>(
          is, new GenericDatumReader<Object>());

  reader.hasNext();
  BloomFilter filter = new BloomFilter();
  AvroBytesRecord
      .fromGenericRecord((GenericRecord) reader.next(), filter);
  IOUtils.closeQuietly(is);
  IOUtils.closeQuietly(reader);

  return filter;
}
 
开发者ID:Hanmourang,项目名称:hiped2,代码行数:15,代码来源:BloomFilterDumper.java

示例5: run

import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
  Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

  Configuration conf = super.getConf();

  JobConf job = new JobConf(conf);
  job.setJarByClass(BloomFilterCreator.class);

  job.set(AvroJob.OUTPUT_SCHEMA, AvroBytesRecord.SCHEMA.toString());
  job.set(AvroJob.OUTPUT_CODEC, SnappyCodec.class.getName());

  job.setInputFormat(KeyValueTextInputFormat.class);
  job.setOutputFormat(AvroOutputFormat.class);

  job.setMapperClass(Map.class);
  job.setReducerClass(Reduce.class);

  job.setMapOutputKeyClass(NullWritable.class);
  job.setMapOutputValueClass(BloomFilter.class);

  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(BloomFilter.class);

  FileInputFormat.setInputPaths(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  return JobClient.runJob(job).isSuccessful() ? 0 : 1;
}
 
开发者ID:Hanmourang,项目名称:hiped2,代码行数:45,代码来源:BloomFilterCreator.java

示例6: map

import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
@Override
public void map(Text key, Text value,
                OutputCollector<NullWritable, BloomFilter> output,
                Reporter reporter) throws IOException {

  System.out.println("K[" + key + "]");

  int age = Integer.valueOf(value.toString());
  if (age > 30) {
    filter.add(new Key(key.toString().getBytes()));
  }
  collector = output;
}
 
开发者ID:Hanmourang,项目名称:hiped2,代码行数:14,代码来源:BloomFilterCreator.java

示例7: reduce

import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
@Override
public void reduce(NullWritable key, Iterator<BloomFilter> values,
                   OutputCollector<AvroWrapper<GenericRecord>,
                       NullWritable> output,
                   Reporter reporter) throws IOException {
  while (values.hasNext()) {
    BloomFilter bf = values.next();
    filter.or(bf);
    System.out.println(filter);
  }
  collector = output;
}
 
开发者ID:Hanmourang,项目名称:hiped2,代码行数:13,代码来源:BloomFilterCreator.java

示例8: createBloomFilter

import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
public BloomFilter createBloomFilter(int numMembers, float falsePosRate) {
	// TODO calculate the optimal Bloom filter size
	// TODO and the optimal number of hash functions
	int vectorSize = getOptimalBloomFilterSize(numMembers, falsePosRate);
	int nbHash = getOptimalK(numMembers, vectorSize);

	// TODO create new Bloom filter
	BloomFilter filter = new BloomFilter(vectorSize, nbHash,
			Hash.MURMUR_HASH);
	
	return filter;
}
 
开发者ID:adamjshook,项目名称:bloomfilter-course,代码行数:13,代码来源:Trainer.java

示例9: createBloomFilter

import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
public BloomFilter createBloomFilter(int numMembers, float falsePosRate) {
	// TODO calculate the optimal Bloom filter size
	// TODO and the optimal number of hash functions

	// TODO create new Bloom filter
	
	return null;
}
 
开发者ID:adamjshook,项目名称:bloomfilter-course,代码行数:9,代码来源:Trainer.java

示例10: ElementIteratorReadIntoMemory

import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
ElementIteratorReadIntoMemory() throws RetrieverException {
    vertices = extractVertices(seedsIter);

    // Create Bloom filter, read through set of entities and add them to
    // Bloom filter
    final BloomFilter filter = BloomFilterUtils.getBloomFilter(store.getProperties().getFalsePositiveRate(),
            vertices.size(), store.getProperties().getMaxBloomFilterToPassToAnIterator());
    addToBloomFilter(vertices, filter);

    initialise(filter);
}
 
开发者ID:gchq,项目名称:Gaffer,代码行数:12,代码来源:AccumuloIDWithinSetRetriever.java

示例11: ElementIteratorReadIntoMemory

import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
ElementIteratorReadIntoMemory() throws RetrieverException {
    verticesA = extractVertices(seedSetAIter);
    verticesB = extractVertices(seedSetBIter);

    // Create Bloom filter, read through set of entities B and add them
    // to Bloom filter
    final BloomFilter filter = BloomFilterUtils.getBloomFilter(store.getProperties().getFalsePositiveRate(),
            verticesB.size(), store.getProperties().getMaxBloomFilterToPassToAnIterator());
    addToBloomFilter(verticesB, filter);
    initialise(filter);
}
 
开发者ID:gchq,项目名称:Gaffer,代码行数:12,代码来源:AccumuloIDBetweenSetsRetriever.java

示例12: addToBloomFilter

import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
protected void addToBloomFilter(final Iterator<? extends Object> vertices, final BloomFilter filter)
        throws RetrieverException {
    try {
        while (vertices.hasNext()) {
            addToBloomFilter(vertices.next(), filter);
        }
    } finally {
        CloseableUtil.close(vertices);
    }
}
 
开发者ID:gchq,项目名称:Gaffer,代码行数:11,代码来源:AccumuloSetRetriever.java

示例13: setFilter

import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
/**
 * For testing only, do not use directly.
 */
public void setFilter(DataByteArray dba) throws IOException {
    DataInputStream dis = new DataInputStream(new
        ByteArrayInputStream(dba.get()));
    filter = new BloomFilter();
    filter.readFields(dis);
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:10,代码来源:Bloom.java

示例14: bloomOr

import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
protected DataByteArray bloomOr(Tuple input) throws IOException {
    filter = new BloomFilter(vSize, numHash, hType);

    try {
        DataBag values = (DataBag)input.get(0);
        for (Iterator<Tuple> it = values.iterator(); it.hasNext();) {
            Tuple t = it.next();
            filter.or(bloomIn((DataByteArray)t.get(0)));
        }
    } catch (ExecException ee) {
        throw new IOException(ee);
    }

    return bloomOut();
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:16,代码来源:BuildBloomBase.java

示例15: bloomIn

import org.apache.hadoop.util.bloom.BloomFilter; //导入依赖的package包/类
protected BloomFilter bloomIn(DataByteArray b) throws IOException {
    DataInputStream dis = new DataInputStream(new
        ByteArrayInputStream(b.get()));
    BloomFilter f = new BloomFilter();
    f.readFields(dis);
    return f;
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:8,代码来源:BuildBloomBase.java


注:本文中的org.apache.hadoop.util.bloom.BloomFilter类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。