本文整理汇总了Java中org.apache.hadoop.io.Text.toString方法的典型用法代码示例。如果您正苦于以下问题:Java Text.toString方法的具体用法?Java Text.toString怎么用?Java Text.toString使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.io.Text
的用法示例。
在下文中一共展示了Text.toString方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: WritableSortable
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
public WritableSortable(int j) throws IOException {
seed = r.nextLong();
r.setSeed(seed);
Text t = new Text();
StringBuilder sb = new StringBuilder();
indices = new int[j];
offsets = new int[j];
check = new String[j];
DataOutputBuffer dob = new DataOutputBuffer();
for (int i = 0; i < j; ++i) {
indices[i] = i;
offsets[i] = dob.getLength();
genRandom(t, r.nextInt(15) + 1, sb);
t.write(dob);
check[i] = t.toString();
}
eob = dob.getLength();
bytes = dob.getData();
comparator = WritableComparator.get(Text.class);
}
示例2: map
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String doc = value.toString();
String text = slice(doc, "<text", "</text>", true);
if (text.length() < 1) return;
char txt[] = text.toLowerCase().toCharArray();
for (int i = 0; i < txt.length; ++i) {
if (!((txt[i] >= 'a' && txt[i] <= 'z') || (txt[i] >= 'A' && txt[i] <= 'Z')))
txt[i] = ' ';
}
String id = slice(doc, "<id>", "</id>", false);
if (id.length() < 1) return;
StringTokenizer itr = new StringTokenizer(String.valueOf(txt));
int sum = itr.countTokens();
while (itr.hasMoreTokens()) {
String s = itr.nextToken();
word.set(id + '-' + s);
IntWritable tmp[] = {new IntWritable(sum), new IntWritable(1)};
IntArrayWritable temp = new IntArrayWritable(tmp);
context.write(word, temp);
}
}
示例3: reduce
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
public void reduce(IntWritable key, Iterator<Text> values,
OutputCollector<Text, Text> out,
Reporter reporter) throws IOException {
keyVal = key.get();
while(values.hasNext()) {
Text value = values.next();
String towrite = value.toString() + "\n";
indexStream.write(towrite.getBytes(Charsets.UTF_8));
written++;
if (written > numIndexes -1) {
// every 1000 indexes we report status
reporter.setStatus("Creating index for archives");
reporter.progress();
endIndex = keyVal;
String masterWrite = startIndex + " " + endIndex + " " + startPos
+ " " + indexStream.getPos() + " \n" ;
outStream.write(masterWrite.getBytes(Charsets.UTF_8));
startPos = indexStream.getPos();
startIndex = endIndex;
written = 0;
}
}
}
示例4: map
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String year = line.substring(15, 19);
int airTemperature;
if (line.charAt(87) == '+'){ // parseInt doesn't like leading plus signs
airTemperature = Integer.parseInt(line.substring(88, 92));
} else {
airTemperature = Integer.parseInt(line.substring(87, 92));
}
String quality = line.substring(92, 93);
if(airTemperature != MISSING && quality.matches("[01459]")) {
context.write(new Text(year), new IntWritable(airTemperature));
}
}
示例5: map
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
* Emits 3 key-value pairs for counting the word, its length, and the
* squares of its length. Outputs are (Text, LongWritable).
*
* @param value
* This will be a line of text coming in from our input file.
*/
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
String string = itr.nextToken();
this.wordLen.set(string.length());
// the square of an integer is an integer...
this.wordLenSq.set((long) Math.pow(string.length(), 2.0));
context.write(LENGTH, this.wordLen);
context.write(SQUARE, this.wordLenSq);
context.write(COUNT, ONE);
}
}
示例6: setRenewer
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
public void setRenewer(Text renewer) {
if (renewer == null) {
this.renewer = new Text();
} else {
HadoopKerberosName renewerKrbName = new HadoopKerberosName(renewer.toString());
try {
this.renewer = new Text(renewerKrbName.getShortName());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
示例7: reduce
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
public void reduce(Text key, Iterator<Text> values,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
String keyStr = key.toString() + this.fieldSeparator;
while (values.hasNext()) {
FieldSelectionHelper helper = new FieldSelectionHelper();
helper.extractOutputKeyValue(keyStr, values.next().toString(),
fieldSeparator, reduceOutputKeyFieldList,
reduceOutputValueFieldList, allReduceValueFieldsFrom, false, false);
output.collect(helper.getKey(), helper.getValue());
}
}
示例8: map
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);
}
}
示例9: map
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
* Emits a key-value pair for counting the word. Outputs are (IntWritable,
* IntWritable).
*
* @param value
* This will be a line of text coming in from our input file.
*/
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
String string = itr.nextToken();
length.set(string.length());
context.write(length, ONE);
}
}
示例10: map
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);
}
}
示例11: map
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
public void map(K key, Text value,
OutputCollector<Text, LongWritable> output,
Reporter reporter)
throws IOException {
String text = value.toString();
Matcher matcher = pattern.matcher(text);
while (matcher.find()) {
output.collect(new Text(matcher.group(group)), new LongWritable(1));
}
}
示例12: map
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
public void map(Text key, Text value, Context context) throws IOException, InterruptedException{
String line = key.toString();
String valueNumber = value.toString();
int id_position = line.indexOf(",");
context.write(new Text(line.substring(0, id_position)), new Text(line.substring(id_position+1,line.length())+","+valueNumber));
}
示例13: testConvertText
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Test
public void testConvertText() {
Text t = new Text("abc".getBytes());
String s = t.toString();
Text t1 = new Text(s);
assertEquals(t, t1);
}
示例14: map
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);
}
}
示例15: map
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
protected void map(twoDimensionIndexWritable key, Text value, Context context) throws IOException, InterruptedException {
int m = key.getM();
int oldTopic = key.getTopic();
String word = value.toString();
int index = wordToIndex.get(word);
docToTopic.getValue(m).setValue(oldTopic,docToTopic.getValue(m).getValue(oldTopic)-1);
topicToWord.getValue(oldTopic).setValue(index,topicToWord.getValue(oldTopic).getValue(index)-1);
double[] p = new double[K];
for(int k = 0; k < K; k++) {
p[k] = (docToTopic.getValue(m).getValue(k)+alpha)/(MatrixOperations.sumByRow(docToTopic).getValue(m)+K*alpha)
*(topicToWord.getValue(k).getValue(index)+beta)/(MatrixOperations.sumByRow(topicToWord).getValue(k)+V*beta);
}
for(int k = 1; k < K; k++){
p[k] += p[k - 1];
}
double u = Math.random() * p[K - 1]; //p[] is unnormalised
int newTopic;
//p当得到当前词属于所有主题z的概率分布后,根据这个概率分布为该词sample一个新的主题,简单通过随机采样选择一个topic
for(newTopic = 0; newTopic < K; newTopic++){
if(u < p[newTopic]){
break;
}
}
docToTopic.getValue(m).setValue(newTopic,docToTopic.getValue(m).getValue(newTopic)+1);
topicToWord.getValue(newTopic).setValue(index,topicToWord.getValue(newTopic).getValue(index)+1);
key.setMatrixKind(MatrixKind.Corpus);
key.setTopic(newTopic);
context.write(key, value);
}