当前位置: 首页>>代码示例>>Java>>正文


Java Emitter.emit方法代码示例

本文整理汇总了Java中org.apache.crunch.Emitter.emit方法的典型用法代码示例。如果您正苦于以下问题:Java Emitter.emit方法的具体用法?Java Emitter.emit怎么用?Java Emitter.emit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.crunch.Emitter的用法示例。


在下文中一共展示了Emitter.emit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: process

import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process (SAMRecordWritable input, Emitter<Pair<String, Integer >> emitter) {
    // emit each position this read overlaps

    SAMRecord record = input.get();
    Integer startPosition = record.getAlignmentStart();

    if (!record.getReadUnmappedFlag() && startPosition != null) {
        int lastInterval = -1;
        for (int i = startPosition; i < record.getAlignmentEnd(); i++) {
            int nextInterval = i / intervalSize;
            // Emit each read once for every interval it covers
            if (nextInterval != lastInterval) {
                lastInterval = nextInterval;
                emitter.emit(
                        // emit contig, interval
                        new Pair<String, Integer>(
                                record.getReferenceName(),
                                nextInterval)
                );
                // Skip length of interval or to the last base
                i = Math.min(record.getAlignmentEnd(), i + intervalSize);
            }
        }
    }
}
 
开发者ID:arahuja,项目名称:varcrunch,代码行数:27,代码来源:ComputeDepthInInterval.java

示例2: process

import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(String line, Emitter<GenericData.Record> emitter) {
  Matcher matcher = pattern.matcher(line);
  if (matcher.matches()) {
    // parse line into components
    recBuilder.set("host", asString(matcher.group(1)));
    recBuilder.set("rfc931_identity", asString(matcher.group(2)));
    recBuilder.set("username", asString(matcher.group(3)));
    recBuilder.set("datetime", asString(matcher.group(4)));
    recBuilder.set("request", asString(matcher.group(5)));
    recBuilder.set("http_status_code", asInt(matcher.group(6)));
    recBuilder.set("response_size", asInt(matcher.group(7)));
    recBuilder.set("referrer", asString(matcher.group(8)));
    recBuilder.set("user_agent", asString(matcher.group(9)));
    emitter.emit(recBuilder.build());
  } else {
    System.err.println("No match: " + line);
  }

}
 
开发者ID:cloudera,项目名称:cdk,代码行数:21,代码来源:CombinedLogFormatConverter.java

示例3: process

import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(Pair<K, Iterable<Pair<Double, T>>> input,
    Emitter<Pair<K, Pair<Double, T>>> emitter) {
  SortedMap<Double, T> reservoir = Maps.newTreeMap();
  for (Pair<Double, T> p : input.second()) {
    if (reservoir.size() < sampleSize) { 
      reservoir.put(p.first(), ptype.getDetachedValue(p.second()));        
    } else if (p.first() > reservoir.firstKey()) {
      reservoir.remove(reservoir.firstKey());
      reservoir.put(p.first(), ptype.getDetachedValue(p.second()));  
    }
  }
  for (Map.Entry<Double, T> e : reservoir.entrySet()) {
    emitter.emit(Pair.of(input.first(), Pair.of(e.getKey(), e.getValue())));
  }
}
 
开发者ID:apsaltis,项目名称:oryx,代码行数:17,代码来源:ReservoirSampling.java

示例4: process

import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(String line, Emitter<Pair<Long, String>> emitter) {
  String[] columns = DelimitedDataUtils.decode(line);

  String userID = columns[0];
  String itemID = columns[1];
  long numericUserID = StringLongMapping.toLong(userID);
  long numericItemID = StringLongMapping.toLong(itemID);

  if (!Long.toString(numericUserID).equals(userID)) {
    emitter.emit(Pair.of(numericUserID, userID));
  }
  if (!Long.toString(numericItemID).equals(itemID)) {
    emitter.emit(Pair.of(numericItemID, itemID));
  }
}
 
开发者ID:apsaltis,项目名称:oryx,代码行数:17,代码来源:MappingParseFn.java

示例5: process

import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(String line, Emitter<Pair<Long, NumericIDValue>> emitter) {
  String[] columns = DelimitedDataUtils.decode(line);

  long userID = StringLongMapping.toLong(columns[0]);
  long itemID = StringLongMapping.toLong(columns[1]);

  float pref;
  if (columns.length > 2) {
    String valueToken = columns[2];
    pref = valueToken.isEmpty() ? Float.NaN : LangUtils.parseFloat(valueToken);
  } else {
    pref = 1.0f;
  }

  emitter.emit(Pair.of(userID, new NumericIDValue(itemID, pref)));
}
 
开发者ID:apsaltis,项目名称:oryx,代码行数:18,代码来源:DelimitedInputParseFn.java

示例6: process

import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(
    Pair<Tuple3<String, Long, String>,
        Iterable<Pair<Long,
            Pair<Tuple3<String, Long, String>, SpecificRecord>>>> input,
    Emitter<Pair<Tuple3<String, Long, String>, SpecificRecord>> emitter) {
  for (Pair<Long,
      Pair<Tuple3<String, Long, String>, SpecificRecord>> pair : input.second()) {
    emitter.emit(pair.second());
  }
}
 
开发者ID:cloudera,项目名称:quince,代码行数:12,代码来源:VariantsLoader.java

示例7: process

import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(
    VariantContextWritable input, Emitter<Pair<Variant, Collection<Genotype>>> emitter) {
  VariantContext bvc = input.get();
  List<org.bdgenomics.adam.models.VariantContext> avcList =
      JavaConversions.seqAsJavaList(vcc.convert(bvc));
  for (org.bdgenomics.adam.models.VariantContext avc : avcList) {
    Variant variant = avc.variant().variant();
    Collection<Genotype> genotypes = JavaConversions.asJavaCollection(avc.genotypes());
    emitter.emit(Pair.of(variant, genotypes));
  }
}
 
开发者ID:cloudera,项目名称:quince,代码行数:13,代码来源:VCFToADAMVariantFn.java

示例8: process

import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process (SAMRecordWritable input, Emitter <Pair<Long, Pair<Integer, SAMRecordWritable>>> emitter) {
    // emit each position this read overlaps

    SAMRecord record = input.get();
    Integer startPosition = record.getAlignmentStart();

    Long lastTask = null;
    if (!record.getReadUnmappedFlag() && startPosition != null) {
        for (int i = startPosition; i < startPosition + record.getReadBases().length; ++i) {
            Long nextTask = positionToTaskMapping.get(new Pair<String, Integer>(record.getReferenceName(), i));

            // If we haven't mapped this position to a task, do so evenly
            if (nextTask == null) {
                nextTask = record.getReferenceName().hashCode() + (long) (i / intervalSize);
            }

            if (nextTask != lastTask) {
                lastTask = nextTask;
                emitter.emit(
                        // emit contig, interval and record
                        new Pair<Long, Pair<Integer, SAMRecordWritable>>(
                                lastTask,
                                new Pair(input.get().getAlignmentStart(), input)
                ));
            }
            // Skip length of interval or to the last base
            i = Math.min(record.getAlignmentEnd(), i + intervalSize);
        }
    }
}
 
开发者ID:arahuja,项目名称:varcrunch,代码行数:32,代码来源:CollectNearbyReadsDoFn.java

示例9: cleanup

import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void cleanup(Emitter<Pair<Integer, Pair<Long, InternalStats>>> emitter) {
  for (Map.Entry<Integer, InternalStats> e : stats.entrySet()) {
    emitter.emit(Pair.of(e.getKey(), Pair.of(count, e.getValue())));
  }
  stats.clear();
}
 
开发者ID:apsaltis,项目名称:oryx,代码行数:8,代码来源:Summarizer.java

示例10: process

import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(V vec, Emitter<Pair<Pair<Integer, Integer>, Pair<V, Long>>> emitFn) {
  Pair<V, Long> out = Pair.of(vec, 1L);
  for (int i = 0; i < centers.size(); i++) {
    Distance d = centers.getDistance(vec, i, approx);
    emitFn.emit(Pair.of(Pair.of(i, d.getClosestCenterId()), out));
  }
}
 
开发者ID:apsaltis,项目名称:oryx,代码行数:9,代码来源:KMeansParallel.java

示例11: process

import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(Pair<Long, Iterable<NumericIDValue>> input, Emitter<String> emitter) {
  StringLongMapping mapping = idMapping.getIDMapping();
  Iterable<NumericIDValue> recs = TopN.selectTopN(input.second().iterator(), numRecs);
  String userID = mapping.toString(input.first());
  for (NumericIDValue rec : recs) {
    emitter.emit(DelimitedDataUtils.encode(userID,
                                           mapping.toString(rec.getID()),
                                           Float.toString(rec.getValue())));
  }
}
 
开发者ID:apsaltis,项目名称:oryx,代码行数:12,代码来源:CollectRecommendFn.java

示例12: process

import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(Pair<Long, float[]> input, Emitter<String> emitter) {
  if (input.first() % convergenceSamplingModulus == 0) {
    String userID = input.first().toString();
    float[] xu = input.second();
    for (LongObjectMap.MapEntry<float[]> entry : yState.getY().entrySet()) {
      long itemID = entry.getKey();
      if (itemID % convergenceSamplingModulus == 0) {
        float estimate = (float) SimpleVectorMath.dot(xu, entry.getValue());
        emitter.emit(DelimitedDataUtils.encode(userID, itemID, estimate));
      }
    }
  }
}
 
开发者ID:apsaltis,项目名称:oryx,代码行数:15,代码来源:ConvergenceSampleFn.java

示例13: process

import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(Pair<Integer, V> in, Emitter<Pair<Integer, Pair<V, Double>>> emitter) {
  Distance d = index.getDistance(in.second(), in.first(), true);
  if (d.getSquaredDistance() > 0.0) {
    emitter.emit(Pair.of(in.first(), Pair.of(in.second(), d.getSquaredDistance())));
  }
}
 
开发者ID:apsaltis,项目名称:oryx,代码行数:8,代码来源:DistanceToClosestFn.java

示例14: process

import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(String line, Emitter<Pair<Long, String>> emitter) {
  String[] columns = DelimitedDataUtils.decode(line);
  long numericID = Long.parseLong(columns[0]);
  String id = columns[1];
  emitter.emit(Pair.of(numericID, id));
}
 
开发者ID:apsaltis,项目名称:oryx,代码行数:8,代码来源:ExistingMappingsMapFn.java

示例15: process

import org.apache.crunch.Emitter; //导入方法依赖的package包/类
@Override
public void process(String input, Emitter<Pair<Integer,String>> emitter) {
  // Similar to:
  // http://blog.cloudera.com/blog/2013/02/how-to-resample-from-a-large-data-set-in-parallel-with-r-on-hadoop/
  // Here, KM > N; let K = S*(M/N). We don't know N. We know S = reducersPerDatum and we have M = numReducers.
  // Each data point can be sent to S reducers chosen uniformly at random. Expected # of data points at each
  // reducer has a binomial distribution with mean K, as desired. For large N this is virtually the same distribution
  // as in the link above, which is Poisson with mean K.
  for (int reducer : random.nextPermutation(numReducers, reducersPerDatum)) {
    emitter.emit(Pair.of(reducer, input));
  }
}
 
开发者ID:apsaltis,项目名称:oryx,代码行数:13,代码来源:DistributeExampleFn.java


注:本文中的org.apache.crunch.Emitter.emit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。