本文整理汇总了Java中org.apache.crunch.Emitter.emit方法的典型用法代码示例。如果您正苦于以下问题:Java Emitter.emit方法的具体用法?Java Emitter.emit怎么用?Java Emitter.emit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.crunch.Emitter
的用法示例。
在下文中一共展示了Emitter.emit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: process
import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process (SAMRecordWritable input, Emitter<Pair<String, Integer >> emitter) {
// emit each position this read overlaps
SAMRecord record = input.get();
Integer startPosition = record.getAlignmentStart();
if (!record.getReadUnmappedFlag() && startPosition != null) {
int lastInterval = -1;
for (int i = startPosition; i < record.getAlignmentEnd(); i++) {
int nextInterval = i / intervalSize;
// Emit each read once for every interval it covers
if (nextInterval != lastInterval) {
lastInterval = nextInterval;
emitter.emit(
// emit contig, interval
new Pair<String, Integer>(
record.getReferenceName(),
nextInterval)
);
// Skip length of interval or to the last base
i = Math.min(record.getAlignmentEnd(), i + intervalSize);
}
}
}
}
示例2: process
import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(String line, Emitter<GenericData.Record> emitter) {
Matcher matcher = pattern.matcher(line);
if (matcher.matches()) {
// parse line into components
recBuilder.set("host", asString(matcher.group(1)));
recBuilder.set("rfc931_identity", asString(matcher.group(2)));
recBuilder.set("username", asString(matcher.group(3)));
recBuilder.set("datetime", asString(matcher.group(4)));
recBuilder.set("request", asString(matcher.group(5)));
recBuilder.set("http_status_code", asInt(matcher.group(6)));
recBuilder.set("response_size", asInt(matcher.group(7)));
recBuilder.set("referrer", asString(matcher.group(8)));
recBuilder.set("user_agent", asString(matcher.group(9)));
emitter.emit(recBuilder.build());
} else {
System.err.println("No match: " + line);
}
}
示例3: process
import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(Pair<K, Iterable<Pair<Double, T>>> input,
Emitter<Pair<K, Pair<Double, T>>> emitter) {
SortedMap<Double, T> reservoir = Maps.newTreeMap();
for (Pair<Double, T> p : input.second()) {
if (reservoir.size() < sampleSize) {
reservoir.put(p.first(), ptype.getDetachedValue(p.second()));
} else if (p.first() > reservoir.firstKey()) {
reservoir.remove(reservoir.firstKey());
reservoir.put(p.first(), ptype.getDetachedValue(p.second()));
}
}
for (Map.Entry<Double, T> e : reservoir.entrySet()) {
emitter.emit(Pair.of(input.first(), Pair.of(e.getKey(), e.getValue())));
}
}
示例4: process
import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(String line, Emitter<Pair<Long, String>> emitter) {
String[] columns = DelimitedDataUtils.decode(line);
String userID = columns[0];
String itemID = columns[1];
long numericUserID = StringLongMapping.toLong(userID);
long numericItemID = StringLongMapping.toLong(itemID);
if (!Long.toString(numericUserID).equals(userID)) {
emitter.emit(Pair.of(numericUserID, userID));
}
if (!Long.toString(numericItemID).equals(itemID)) {
emitter.emit(Pair.of(numericItemID, itemID));
}
}
示例5: process
import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(String line, Emitter<Pair<Long, NumericIDValue>> emitter) {
String[] columns = DelimitedDataUtils.decode(line);
long userID = StringLongMapping.toLong(columns[0]);
long itemID = StringLongMapping.toLong(columns[1]);
float pref;
if (columns.length > 2) {
String valueToken = columns[2];
pref = valueToken.isEmpty() ? Float.NaN : LangUtils.parseFloat(valueToken);
} else {
pref = 1.0f;
}
emitter.emit(Pair.of(userID, new NumericIDValue(itemID, pref)));
}
示例6: process
import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(
Pair<Tuple3<String, Long, String>,
Iterable<Pair<Long,
Pair<Tuple3<String, Long, String>, SpecificRecord>>>> input,
Emitter<Pair<Tuple3<String, Long, String>, SpecificRecord>> emitter) {
for (Pair<Long,
Pair<Tuple3<String, Long, String>, SpecificRecord>> pair : input.second()) {
emitter.emit(pair.second());
}
}
示例7: process
import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(
VariantContextWritable input, Emitter<Pair<Variant, Collection<Genotype>>> emitter) {
VariantContext bvc = input.get();
List<org.bdgenomics.adam.models.VariantContext> avcList =
JavaConversions.seqAsJavaList(vcc.convert(bvc));
for (org.bdgenomics.adam.models.VariantContext avc : avcList) {
Variant variant = avc.variant().variant();
Collection<Genotype> genotypes = JavaConversions.asJavaCollection(avc.genotypes());
emitter.emit(Pair.of(variant, genotypes));
}
}
示例8: process
import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process (SAMRecordWritable input, Emitter <Pair<Long, Pair<Integer, SAMRecordWritable>>> emitter) {
// emit each position this read overlaps
SAMRecord record = input.get();
Integer startPosition = record.getAlignmentStart();
Long lastTask = null;
if (!record.getReadUnmappedFlag() && startPosition != null) {
for (int i = startPosition; i < startPosition + record.getReadBases().length; ++i) {
Long nextTask = positionToTaskMapping.get(new Pair<String, Integer>(record.getReferenceName(), i));
// If we haven't mapped this position to a task, do so evenly
if (nextTask == null) {
nextTask = record.getReferenceName().hashCode() + (long) (i / intervalSize);
}
if (nextTask != lastTask) {
lastTask = nextTask;
emitter.emit(
// emit contig, interval and record
new Pair<Long, Pair<Integer, SAMRecordWritable>>(
lastTask,
new Pair(input.get().getAlignmentStart(), input)
));
}
// Skip length of interval or to the last base
i = Math.min(record.getAlignmentEnd(), i + intervalSize);
}
}
}
示例9: cleanup
import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void cleanup(Emitter<Pair<Integer, Pair<Long, InternalStats>>> emitter) {
for (Map.Entry<Integer, InternalStats> e : stats.entrySet()) {
emitter.emit(Pair.of(e.getKey(), Pair.of(count, e.getValue())));
}
stats.clear();
}
示例10: process
import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(V vec, Emitter<Pair<Pair<Integer, Integer>, Pair<V, Long>>> emitFn) {
Pair<V, Long> out = Pair.of(vec, 1L);
for (int i = 0; i < centers.size(); i++) {
Distance d = centers.getDistance(vec, i, approx);
emitFn.emit(Pair.of(Pair.of(i, d.getClosestCenterId()), out));
}
}
示例11: process
import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(Pair<Long, Iterable<NumericIDValue>> input, Emitter<String> emitter) {
StringLongMapping mapping = idMapping.getIDMapping();
Iterable<NumericIDValue> recs = TopN.selectTopN(input.second().iterator(), numRecs);
String userID = mapping.toString(input.first());
for (NumericIDValue rec : recs) {
emitter.emit(DelimitedDataUtils.encode(userID,
mapping.toString(rec.getID()),
Float.toString(rec.getValue())));
}
}
示例12: process
import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(Pair<Long, float[]> input, Emitter<String> emitter) {
if (input.first() % convergenceSamplingModulus == 0) {
String userID = input.first().toString();
float[] xu = input.second();
for (LongObjectMap.MapEntry<float[]> entry : yState.getY().entrySet()) {
long itemID = entry.getKey();
if (itemID % convergenceSamplingModulus == 0) {
float estimate = (float) SimpleVectorMath.dot(xu, entry.getValue());
emitter.emit(DelimitedDataUtils.encode(userID, itemID, estimate));
}
}
}
}
示例13: process
import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(Pair<Integer, V> in, Emitter<Pair<Integer, Pair<V, Double>>> emitter) {
Distance d = index.getDistance(in.second(), in.first(), true);
if (d.getSquaredDistance() > 0.0) {
emitter.emit(Pair.of(in.first(), Pair.of(in.second(), d.getSquaredDistance())));
}
}
示例14: process
import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(String line, Emitter<Pair<Long, String>> emitter) {
String[] columns = DelimitedDataUtils.decode(line);
long numericID = Long.parseLong(columns[0]);
String id = columns[1];
emitter.emit(Pair.of(numericID, id));
}
示例15: process
import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(String input, Emitter<Pair<Integer,String>> emitter) {
// Similar to:
// http://blog.cloudera.com/blog/2013/02/how-to-resample-from-a-large-data-set-in-parallel-with-r-on-hadoop/
// Here, KM > N; let K = S*(M/N). We don't know N. We know S = reducersPerDatum and we have M = numReducers.
// Each data point can be sent to S reducers chosen uniformly at random. Expected # of data points at each
// reducer has a binomial distribution with mean K, as desired. For large N this is virtually the same distribution
// as in the link above, which is Poisson with mean K.
for (int reducer : random.nextPermutation(numReducers, reducersPerDatum)) {
emitter.emit(Pair.of(reducer, input));
}
}