当前位置: 首页>>代码示例>>Java>>正文


Java Pair类代码示例

本文整理汇总了Java中org.apache.mahout.common.Pair的典型用法代码示例。如果您正苦于以下问题:Java Pair类的具体用法?Java Pair怎么用?Java Pair使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


Pair类属于org.apache.mahout.common包,在下文中一共展示了Pair类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: loadResult

import org.apache.mahout.common.Pair; //导入依赖的package包/类
public double loadResult(Path outputDirPath, Configuration conf) throws IOException {
  Path finalNumberFile = new Path(outputDirPath, "part-r-00000");
  SequenceFileIterator<NullWritable, DoubleWritable> iterator = 
      new SequenceFileIterator<NullWritable, DoubleWritable>(
      finalNumberFile, true, conf);
  double norm2;
  try {
    Pair<NullWritable, DoubleWritable> next = iterator.next();
    norm2 = next.getSecond().get();
    if (iterator.hasNext())
      throw new IOException("More than one value after norm2Job!");
  } finally {
    Closeables.close(iterator, false);
  }
  return norm2;
}
 
开发者ID:SiddharthMalhotra,项目名称:sPCA,代码行数:17,代码来源:Norm2Job.java

示例2: loadDictionary

import org.apache.mahout.common.Pair; //导入依赖的package包/类
private static String[] loadDictionary(String dictionaryPath, Configuration conf) {
  if (dictionaryPath == null) {
    return null;
  }
  Path dictionaryFile = new Path(dictionaryPath);
  List<Pair<Integer, String>> termList = Lists.newArrayList();
  int maxTermId = 0;
   // key is word value is id
  for (Pair<Writable, IntWritable> record
          : new SequenceFileIterable<Writable, IntWritable>(dictionaryFile, true, conf)) {
    termList.add(new Pair<Integer, String>(record.getSecond().get(),
        record.getFirst().toString()));
    maxTermId = Math.max(maxTermId, record.getSecond().get());
  }
  String[] terms = new String[maxTermId + 1];
  for (Pair<Integer, String> pair : termList) {
    terms[pair.getFirst()] = pair.getSecond();
  }
  return terms;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:21,代码来源:InMemoryCollapsedVariationalBayes0.java

示例3: readPerplexity

import org.apache.mahout.common.Pair; //导入依赖的package包/类
/**
 * @param topicModelStateTemp
 * @param iteration
 * @return {@code double[2]} where first value is perplexity and second is model weight of those
 *         documents sampled during perplexity computation, or {@code null} if no perplexity data
 *         exists for the given iteration.
 * @throws IOException
 */
public static double readPerplexity(Configuration conf, Path topicModelStateTemp, int iteration)
  throws IOException {
  Path perplexityPath = perplexityPath(topicModelStateTemp, iteration);
  FileSystem fs = FileSystem.get(perplexityPath.toUri(), conf);
  if (!fs.exists(perplexityPath)) {
    log.warn("Perplexity path {} does not exist, returning NaN", perplexityPath);
    return Double.NaN;
  }
  double perplexity = 0;
  double modelWeight = 0;
  long n = 0;
  for (Pair<DoubleWritable, DoubleWritable> pair : new SequenceFileDirIterable<DoubleWritable, DoubleWritable>(
      perplexityPath, PathType.LIST, PathFilters.partFilter(), null, true, conf)) {
    modelWeight += pair.getFirst().get();
    perplexity += pair.getSecond().get();
    n++;
  }
  log.info("Read {} entries with total perplexity {} and model weight {}", new Object[] { n,
          perplexity, modelWeight });
  return perplexity / modelWeight;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:30,代码来源:CVB0Driver.java

示例4: pruneVectors

import org.apache.mahout.common.Pair; //导入依赖的package包/类
public static void pruneVectors(Path tfDir, Path prunedTFDir, Path prunedPartialTFDir, long maxDF,
                                Configuration baseConf,
                                Pair<Long[], List<Path>> docFrequenciesFeatures,
                                float normPower,
                                boolean logNormalize,
                                int numReducers) throws IOException, InterruptedException, ClassNotFoundException {

  int partialVectorIndex = 0;
  List<Path> partialVectorPaths = new ArrayList<Path>();
  for (Path path : docFrequenciesFeatures.getSecond()) {
    Path partialVectorOutputPath = new Path(prunedPartialTFDir, "partial-" + partialVectorIndex++);
    partialVectorPaths.add(partialVectorOutputPath);
    pruneVectorsPartial(tfDir, partialVectorOutputPath, path, maxDF, baseConf);
  }

  mergePartialVectors(partialVectorPaths, prunedTFDir, baseConf, normPower, logNormalize, numReducers);
  HadoopUtil.delete(new Configuration(baseConf), prunedPartialTFDir);
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:19,代码来源:HighDFWordsPruner.java

示例5: setup

import org.apache.mahout.common.Pair; //导入依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
  super.setup(context);
  Configuration conf = context.getConfiguration();
  URI[] localFiles = DistributedCache.getCacheFiles(conf);
  Preconditions.checkArgument(localFiles != null && localFiles.length >= 1,
          "missing paths from the DistributedCache");

  dimension = conf.getInt(PartialVectorMerger.DIMENSION, Integer.MAX_VALUE);
  sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);
  namedVector = conf.getBoolean(PartialVectorMerger.NAMED_VECTOR, false);
  maxNGramSize = conf.getInt(DictionaryVectorizer.MAX_NGRAMS, maxNGramSize);

  Path dictionaryFile = new Path(localFiles[0].getPath());
  // key is word value is id
  for (Pair<Writable, IntWritable> record
          : new SequenceFileIterable<Writable, IntWritable>(dictionaryFile, true, conf)) {
    dictionary.put(record.getFirst().toString(), record.getSecond().get());
  }
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:21,代码来源:TFPartialVectorReducer.java

示例6: calculateDF

import org.apache.mahout.common.Pair; //导入依赖的package包/类
/**
 * Calculates the document frequencies of all terms from the input set of vectors in
 * {@link SequenceFile} format. This job uses a fixed limit on the maximum memory used by the feature chunk
 * per node thereby splitting the process across multiple map/reduces.
 * 
 * @param input
 *          input directory of the vectors in {@link SequenceFile} format
 * @param output
 *          output directory where document frequencies will be stored
 * @param chunkSizeInMegabytes
 *          the size in MB of the feature => id chunk to be kept in memory at each node during Map/Reduce
 *          stage. Its recommended you calculated this based on the number of cores and the free memory
 *          available to you per node. Say, you have 2 cores and around 1GB extra memory to spare we
 *          recommend you use a split size of around 400-500MB so that two simultaneous reducers can create
 *          partial vectors without thrashing the system due to increased swapping
 */
public static Pair<Long[],List<Path>> calculateDF(Path input,
                                                  Path output,
                                                  Configuration baseConf,
                                                  int chunkSizeInMegabytes)
  throws IOException, InterruptedException, ClassNotFoundException {

  if (chunkSizeInMegabytes < MIN_CHUNKSIZE) {
    chunkSizeInMegabytes = MIN_CHUNKSIZE;
  } else if (chunkSizeInMegabytes > MAX_CHUNKSIZE) { // 10GB
    chunkSizeInMegabytes = MAX_CHUNKSIZE;
  }

  Path wordCountPath = new Path(output, WORDCOUNT_OUTPUT_FOLDER);

  startDFCounting(input, wordCountPath, baseConf);

  return createDictionaryChunks(wordCountPath, output, baseConf, chunkSizeInMegabytes);
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:35,代码来源:TFIDFConverter.java

示例7: setup

import org.apache.mahout.common.Pair; //导入依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
  super.setup(context);
  Configuration conf = context.getConfiguration();
  URI[] localFiles = DistributedCache.getCacheFiles(conf);
  Preconditions.checkArgument(localFiles != null && localFiles.length >= 1, 
      "missing paths from the DistributedCache");

  vectorCount = conf.getLong(TFIDFConverter.VECTOR_COUNT, 1);
  featureCount = conf.getLong(TFIDFConverter.FEATURE_COUNT, 1);
  minDf = conf.getInt(TFIDFConverter.MIN_DF, 1);
  maxDf = conf.getLong(TFIDFConverter.MAX_DF, -1);
  sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);
  namedVector = conf.getBoolean(PartialVectorMerger.NAMED_VECTOR, false);

  Path dictionaryFile = new Path(localFiles[0].getPath());
  // key is feature, value is the document frequency
  for (Pair<IntWritable,LongWritable> record 
       : new SequenceFileIterable<IntWritable,LongWritable>(dictionaryFile, true, conf)) {
    dictionary.put(record.getFirst().get(), record.getSecond().get());
  }
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:23,代码来源:TFIDFPartialVectorReducer.java

示例8: setup

import org.apache.mahout.common.Pair; //导入依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
  super.setup(context);
  Configuration conf = context.getConfiguration();
  URI[] localFiles = DistributedCache.getCacheFiles(conf);
  Preconditions.checkArgument(localFiles != null && localFiles.length >= 1,
          "missing paths from the DistributedCache");

  maxDf = conf.getLong(HighDFWordsPruner.MAX_DF, -1);

  Path dictionaryFile = new Path(localFiles[0].getPath());
  // key is feature, value is the document frequency
  for (Pair<IntWritable, LongWritable> record :
          new SequenceFileIterable<IntWritable, LongWritable>(dictionaryFile, true, conf)) {
    dictionary.put(record.getFirst().get(), record.getSecond().get());
  }
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:18,代码来源:WordsPrunerReducer.java

示例9: processOutput

import org.apache.mahout.common.Pair; //导入依赖的package包/类
protected RuleBase processOutput(JobContext job, Path outputPath) throws IOException {
 
  Configuration conf = job.getConfiguration();

  FileSystem fs = outputPath.getFileSystem(conf);

  Path[] outfiles = Chi_RWCSUtils.listOutputFiles(fs, outputPath);
  
  RuleBase ruleBase = null;
  
  // read all the outputs
  for (Path path : outfiles) {
    for (Pair<LongWritable,RuleBase> record : new SequenceFileIterable<LongWritable, RuleBase>(path, conf)) {
  	if(ruleBase == null){
        ruleBase = record.getSecond();
  	}
    }
  }
  
  return ruleBase;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:22,代码来源:PartialBuilder.java

示例10: computeNext

import org.apache.mahout.common.Pair; //导入依赖的package包/类
@Override
protected Pair<K,V> computeNext() {
  if (!reuseKeyValueInstances || value == null) {
    key = ReflectionUtils.newInstance(keyClass, conf);
    if (!noValue) {
      value = ReflectionUtils.newInstance(valueClass, conf);
    }
  }
  try {
    boolean available;
    if (noValue) {
      available = reader.next(key);
    } else {
      available = reader.next(key, value);
    }
    if (!available) {
      close();
      return null;
    }
    return new Pair<K,V>(key, value);
  } catch (IOException ioe) {
    close();
    throw new IllegalStateException(ioe);
  }
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:26,代码来源:SequenceFileIterator.java

示例11: StableFixedSizeSamplingIterator

import org.apache.mahout.common.Pair; //导入依赖的package包/类
public StableFixedSizeSamplingIterator(int size, Iterator<T> source) {
  List<Pair<Integer,T>> buf = Lists.newArrayListWithCapacity(size);
  int sofar = 0;
  Random random = RandomUtils.getRandom();
  while (source.hasNext()) {
    T v = source.next();
    sofar++;
    if (buf.size() < size) {
      buf.add(new Pair<Integer,T>(sofar, v));
    } else {
      int position = random.nextInt(sofar);
      if (position < buf.size()) {
        buf.set(position, new Pair<Integer,T>(sofar, v));
      }
    }
  }

  Collections.sort(buf);
  delegate = Iterators.transform(buf.iterator(),
                                 new Function<Pair<Integer,T>,T>() {
                                   @Override
                                   public T apply(Pair<Integer,T> from) {
                                     return from.getSecond();
                                   }
                                 });
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:27,代码来源:StableFixedSizeSamplingIterator.java

示例12: move

import org.apache.mahout.common.Pair; //导入依赖的package包/类
public static void move(MyVectorInput input, MyVectorOutput output) throws IOException{
    
    //move the dictionary
    Pair<String,Integer> dictEntry;
    int terms=0, vectors=0;
    
    while((dictEntry=input.nextDictEntry())!=null){
        output.writeDictEntry(dictEntry);
        terms++;
    }
    
    System.out.println("Moved "+terms+" dict terms");
    
    //move the vectors
    MySparseVector vector;
    while((vector=input.nextVector())!=null){
        output.writeVector(vector);
        vectors++;
    }
    
    output.close();
    
    System.out.println("Moved "+ vectors+ " document vectors");
}
 
开发者ID:project-asap,项目名称:IReS-Platform,代码行数:25,代码来源:Mover.java

示例13: popMin

import org.apache.mahout.common.Pair; //导入依赖的package包/类
private static Pair<Integer,Double> popMin(MySparseVector v){
    int minIndexIndex=-1, minIndexValue=Integer.MAX_VALUE;
    double minValue=-1;
    
    for(int i=0; i<v.size(); i++){
        if(minIndexValue>v.indices.get(i)){
            minIndexIndex=i;
            minIndexValue=v.indices.get(i);
            minValue=v.values.get(i);
        }
    }
    
    v.indices.remove(minIndexIndex);
    v.values.remove(minIndexIndex);
    return new Pair(minIndexValue, minValue);
    
}
 
开发者ID:project-asap,项目名称:IReS-Platform,代码行数:18,代码来源:ArffOutput.java

示例14: nextDictEntry

import org.apache.mahout.common.Pair; //导入依赖的package包/类
@Override
public Pair<String, Integer> nextDictEntry() throws IOException {
    String line, term;
    while((line=reader.readLine())!=null){
        if(!line.startsWith("@attribute"))
           if(line.startsWith("@data")) break; //end of dictinary
           else continue;
        if(line.contains("@@[email protected]@")) continue;
        
        //handle the new term
        termCount++; //new term
        
        //strip the term
        int beginningOfTerm=11; // "@attribute" is 10 chars long, we will strip this and the following whitespace
        int endOfTerm=line.lastIndexOf("numeric")-1;
        term = line.substring(beginningOfTerm, endOfTerm);
        //return the new term with its id
        return new Pair(term, termCount);            
        
    }
    return null;
}
 
开发者ID:project-asap,项目名称:IReS-Platform,代码行数:23,代码来源:ArffInput.java

示例15: nextVector

import org.apache.mahout.common.Pair; //导入依赖的package包/类
@Override
public MySparseVector nextVector() throws IOException {
   
    if(!vecIterator.hasNext()) return null;
    
    Pair<Text, VectorWritable> entry = vecIterator.next();
    
    String name = entry.getFirst().toString();
    VectorWritable mahoutVector = entry.getSecond();
    
    ArrayList<Integer> indices = new ArrayList();
    ArrayList<Double> values = new ArrayList();
    
    for(Element e: mahoutVector.get().all()){
        double value =e.get();
        if (value==0) continue;
        values.add(value);
        int index= e.index();
        indices.add(index);            
    }
    
    return new MySparseVector(indices, values);
    
}
 
开发者ID:project-asap,项目名称:IReS-Platform,代码行数:25,代码来源:MahoutInput.java


注:本文中的org.apache.mahout.common.Pair类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。