本文整理汇总了Java中com.google.common.hash.BloomFilter类的典型用法代码示例。如果您正苦于以下问题:Java BloomFilter类的具体用法?Java BloomFilter怎么用?Java BloomFilter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
BloomFilter类属于com.google.common.hash包,在下文中一共展示了BloomFilter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: readBloomFilterFromfile
import com.google.common.hash.BloomFilter; //导入依赖的package包/类
static BloomFilter<String> readBloomFilterFromfile(String bloomFilterFilePath) throws IOException {
Funnel<String> memberFunnel = new Funnel<String>() {
public void funnel(String memberId, PrimitiveSink sink) {
sink.putString(memberId, Charsets.UTF_8);
}
};
try
{
FileInputStream fis = new FileInputStream(new File(bloomFilterFilePath));
return BloomFilter.readFrom(fis, memberFunnel);
}
catch(Exception e)
{
e.printStackTrace();
}
return null;
}
示例2: initBloomfilters
import com.google.common.hash.BloomFilter; //导入依赖的package包/类
/**
* initBloomfilters
*
* @param falsePositiveProbability
* @param expectedNumberOfElements
*/
private void initBloomfilters(double falsePositiveProbability, long expectedNumberOfElements) {
double singleBfMaxAddElements = BloomFilterUtils.caculateNumberAddElements(Integer.MAX_VALUE,
falsePositiveProbability);
double singleBfElements = expectedNumberOfElements;
int bflen = 1;
//假如比整形最大值,还需要大,分桶设计
if (expectedNumberOfElements > Integer.MAX_VALUE) {
bflen = (int) Math.ceil(expectedNumberOfElements % singleBfMaxAddElements);
singleBfElements = Math.round(expectedNumberOfElements / bflen) + 1;
}
this.bloomfilters = new ArrayList<>(bflen);
checkArgument(bflen < 1, "the length of bloomfilters cannot be smaller than one.");
for (int i = 0; i < bflen; i++) {
BloomFilter<CharSequence> bf = BloomFilter.create(Funnels.stringFunnel(this.charset), (long)
singleBfElements, falsePositiveProbability);
this.bloomfilters.add(bf);
}
}
示例3: add
import com.google.common.hash.BloomFilter; //导入依赖的package包/类
/**
* add
*
* @param element
* @return
*/
public boolean add(CharSequence element) {
if (element == null) {
return false;
}
initCharset();
BloomFilter<CharSequence> bf = this.bloomfilters.get(0);
if (getBloomfilterBucketLength() > 1) {
byte[] datas = element.toString().getBytes(this.charset);
int bfIndex = Math.abs(Hashing.murmur3_128().hashBytes(datas).asInt()) % getBloomfilterBucketLength();
bf = this.bloomfilters.get(bfIndex);
}
synchronized (bf) {
bf.put(element);
numberOfAddedElements++;
return true;
}
}
示例4: CollisionHandler
import com.google.common.hash.BloomFilter; //导入依赖的package包/类
public CollisionHandler(int numFilters, int size) {
filters = new ArrayList<>();
for(int i = 0;i < numFilters;++i) {
BloomFilter<Long> collisionFilter = BloomFilter.create(new Funnel<Long>() {
/**
* Sends a stream of data from the {@code from} object into the sink {@code into}. There
* is no requirement that this data be complete enough to fully reconstitute the object
* later.
*
* @param from
* @param into
*/
@Override
public void funnel(Long from, Sink into) {
into.putLong(from);
}
}, size);
filters.add(collisionFilter);
}
}
示例5: create
import com.google.common.hash.BloomFilter; //导入依赖的package包/类
/** Returns a new PremiumListRevision for the given key and premium list map. */
@VisibleForTesting
public static PremiumListRevision create(PremiumList parent, Set<String> premiumLabels) {
PremiumListRevision revision = new PremiumListRevision();
revision.parent = Key.create(parent);
revision.revisionId = allocateId();
// All premium list labels are already punycoded, so don't perform any further character
// encoding on them.
revision.probablePremiumLabels =
BloomFilter.create(unencodedCharsFunnel(), premiumLabels.size());
premiumLabels.forEach(revision.probablePremiumLabels::put);
try {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
revision.probablePremiumLabels.writeTo(bos);
checkArgument(
bos.size() <= MAX_BLOOM_FILTER_BYTES,
"Too many premium labels were specified; Bloom filter exceeds max entity size");
} catch (IOException e) {
throw new IllegalStateException("Could not serialize premium labels Bloom filter", e);
}
return revision;
}
示例6: getFilter
import com.google.common.hash.BloomFilter; //导入依赖的package包/类
private BloomFilter<MapTuple> getFilter(Object batch) {
if (_filters.containsKey(batch) == false) {
Funnel<MapTuple> funnel = new Funnel<MapTuple>() {
private static final long serialVersionUID = 3504134639163725164L;
@Override
public void funnel(MapTuple from, PrimitiveSink into) {
if (_uniqueFields == null) {
into.putString(from.values().toString(), Charset.defaultCharset());
} else {
for(String f : _uniqueFields) {
into.putString(from.get(f).toString(), Charset.defaultCharset());
}
}
}
};
logger().info("Creating unique filter with max expected capacity of: " + _expectedSize);
_filters.put(batch, BloomFilter.create(funnel, _expectedSize));
}
return _filters.get(batch);
}
示例7: commit
import com.google.common.hash.BloomFilter; //导入依赖的package包/类
/**
* Makes all changes made since the previous
* commit/rollback permanent and releases any database locks
* currently held by this <code>Connection</code> object.
* This method should be
* used only when auto-commit mode has been disabled.
*
* @exception java.sql.SQLException if a database access error occurs,
* this method is called while participating in a distributed transaction,
* if this method is called on a closed conection or this
* <code>Connection</code> object is in auto-commit mode
* @see #setAutoCommit
*/
public synchronized void commit() throws SQLException {
numberOfCommits++;
RetryExecution execution = new RetryExecution("COMMIT");
execution.execute(connection, new RetryCommand<Void>() {
@Override
public Void run() throws SQLException {
if(tripleBatch != null && tripleBatch.size() > 0) {
flushBatch();
}
deletedStatementsLog = BloomFilter.create(Funnels.longFunnel(), 100000);
if(connection != null) {
connection.commit();
}
return null;
}
});
this.transactionId = getNextSequence();
}
示例8: rollback
import com.google.common.hash.BloomFilter; //导入依赖的package包/类
/**
* Undoes all changes made in the current transaction
* and releases any database locks currently held
* by this <code>Connection</code> object. This method should be
* used only when auto-commit mode has been disabled.
*
* @exception java.sql.SQLException if a database access error occurs,
* this method is called while participating in a distributed transaction,
* this method is called on a closed connection or this
* <code>Connection</code> object is in auto-commit mode
* @see #setAutoCommit
*/
public void rollback() throws SQLException {
if(tripleBatch != null && tripleBatch.size() > 0) {
synchronized (tripleBatch) {
for(KiWiTriple triple : tripleBatch) {
triple.setId(-1L);
}
tripleBatch.clear();
}
}
deletedStatementsLog = BloomFilter.create(Funnels.longFunnel(), 100000);
if(connection != null && !connection.isClosed()) {
connection.rollback();
}
this.transactionId = getNextSequence();
}
示例9: getFilter
import com.google.common.hash.BloomFilter; //导入依赖的package包/类
/**
* Retrieve an filter base on a data hash obtained from {@link getDataHashCode}
*
* @return
* @throws java.io.IOException
*/
protected BloomFilter getFilter() throws IOException
{
if (this.filter != null) {
return this.filter;
}
if (filterProvider.hasFilter(this.filterFileName)) {
this.filter = (BloomFilter) filterProvider.loadFilter(this.filterFileName);
return this.filter;
}
double falsePositiveProbability = config.getFalsePositiveProbability();
int expectedNumberOfElements = config.getExpectedNumberOfElements();
this.filter = BloomFilter.create(StringHashFunnel.INSTANCE, expectedNumberOfElements, falsePositiveProbability);
return this.filter;
}
示例10: setup
import com.google.common.hash.BloomFilter; //导入依赖的package包/类
public void setup(ProcessContext context) {
try
{
// If positive, use that particular field number in the input CSV message as input for count
String useMsgList ="taxi_identifier";
bloomFilterFilePaths=context.getProperty(BLOOMFILTER_FILEPATH).evaluateAttributeExpressions().getValue();//"/home/sivaprakash/Workspace/Edgent/bloomfilter_taxi_id.model";
int expectedInsertions = 20000000;
useMsgFieldList = useMsgList.split(",");
String bloomFilterPathList[] = bloomFilterFilePaths.split(",");
// Check to enable matching of bloom filter model files and fields provided in property file
if(useMsgFieldList.length != bloomFilterPathList.length) {
return;
}
bloomFilterMap = new HashMap<String,BloomFilter<String>>();
testingRange = expectedInsertions;
/// Populating bloom filter for each model
for(int i = 0; i < useMsgFieldList.length ;i++) {
//Load BloomFilter from serialized file
bloomFilter = readBloomFilterFromfile(bloomFilterPathList[i]);
if(bloomFilter == null) {
return;
}
bloomFilterMap.put(useMsgFieldList[i], bloomFilter);
}
} catch (Exception e) {
}
}
示例11: mightContain
import com.google.common.hash.BloomFilter; //导入依赖的package包/类
public boolean mightContain(Task task) {
for (BloomFilter<Task> filter : bloomFilters) {
if (filter.mightContain(task)) {
return true;
}
}
return false;
}
示例12: readFromFile
import com.google.common.hash.BloomFilter; //导入依赖的package包/类
public void readFromFile(String dir) throws IOException {
File file;
int p = 0;
while ((file = new File(dir, this.getClass().getTypeName() + "$" + p + ".tmp")).exists()) {
logger.info("Reading bloom remover data ${} from file {}...", p, file.getPath());
FileInputStream inputStream = new FileInputStream(file);
this.bloomFilters.clear();
bloomFilters.add(activateBloomFilter = BloomFilter.readFrom(inputStream, Task.DIGEST));
p++;
}
logger.info("Bloom remover data [$0-${}] is successfully loaded.", groupSize() - 1);
}
示例13: URIBloomFilter
import com.google.common.hash.BloomFilter; //导入依赖的package包/类
/**
* 初始化一个bloom过滤器到内存中
*
* @param expectedInsertions 预估的最大元素容量
* @param fpp 误报概率
*/
public URIBloomFilter(long expectedInsertions, double fpp) {
urlCounter = new AtomicLong(0);
this.expectedInsertions = expectedInsertions;
this.fpp = fpp;
bloomFilter = BloomFilter.create(
Funnels.stringFunnel(Charset.defaultCharset()), expectedInsertions, fpp);
}
示例14: contains
import com.google.common.hash.BloomFilter; //导入依赖的package包/类
/**
* Returns true if the element could have been inserted into the Bloom filter.
* Use getFalsePositiveProbability() to calculate the probability of this
* being correct.
*
* @param element element to check.
* @return true if the element could have been inserted into the Bloom filter.
*/
public boolean contains(CharSequence element) {
if (element == null) {
return false;
}
initCharset();
BloomFilter<CharSequence> bf = this.bloomfilters.get(0);
if (getBloomfilterBucketLength() > 1) {
byte[] datas = element.toString().getBytes(this.charset);
int bfIndex = Math.abs(Hashing.murmur3_128().hashBytes(datas).asInt()) % getBloomfilterBucketLength();
bf = this.bloomfilters.get(bfIndex);
}
return bf.mightContain(element);
}
示例15: getOrCreate
import com.google.common.hash.BloomFilter; //导入依赖的package包/类
private BloomFilter<Seed> getOrCreate(String segment) {
BloomFilter<Seed> seedBloomFilter = bloomFilters.get(segment);
if (seedBloomFilter != null) {
return seedBloomFilter;
}
synchronized (segment.intern()) {
seedBloomFilter = bloomFilters.get(segment);
if (seedBloomFilter != null) {
return seedBloomFilter;
}
long expectedNumber = NumberUtils.toLong(VSCrawlerContext.vsCrawlerConfigFileWatcher.loadedProperties()
.getProperty(VSCrawlerConstant.VSCRAWLER_SEED_MANAGER_EXPECTED_SEED_NUMBER), 1000000L);
// any way, build a filter instance if not exist
seedBloomFilter = BloomFilter.create(new Funnel<Seed>() {
@Override
public void funnel(Seed from, PrimitiveSink into) {
into.putString(seedKeyResolver.resolveSeedKey(from), Charset.defaultCharset());
}
}, expectedNumber);
bloomFilters.put(segment, seedBloomFilter);
}
return seedBloomFilter;
}