Java TextDelimited类代码示例

本文整理汇总了Java中cascading.scheme.hadoop.TextDelimited类的典型用法代码示例。如果您正苦于以下问题：Java TextDelimited类的具体用法？Java TextDelimited怎么用？Java TextDelimited使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

TextDelimited类属于cascading.scheme.hadoop包，在下文中一共展示了TextDelimited类的12个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
public static void main(String... args) {

		String inPath = args[0];
		String outPath = args[1];

		Properties properties = new Properties();
		AppProps.setApplicationJarClass(properties, Main.class);
		HadoopFlowConnector flowConnector = new HadoopFlowConnector(properties);

		// create the source tap
		Tap inTap = new Hfs(new TextDelimited(true, "\t"), inPath);

		// create the sink tap
		Tap outTap = new Hfs(new TextDelimited(true, "\t"), outPath);

		// specify a pipe to connect the taps
		Pipe copyPipe = new Pipe("copy");

		// connect the taps, pipes, etc., into a flow
		FlowDef flowDef = FlowDef.flowDef().addSource(copyPipe, inTap).addTailSink(copyPipe, outTap);

		// run the flow
		flowConnector.connect(flowDef).complete();
	}

开发者ID:xuzhikethinker，项目名称:t4f-data，代码行数:25，代码来源:Main.java

示例2: testWhenExtraColumnsNotStrict

import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
@Test
public void testWhenExtraColumnsNotStrict() throws Exception {
  String sourcePath = "src/test/resources/input/with-extra-columns.txt";
  String sinkPath = "src/test/resources/input/sink-with-headers";
  String expectedPath = "src/test/resources/expected/with-extra-columns-no-strict.txt";
  String trapPath = "src/test/resources/input/trap-sink-with-headers";
  String expectedTrapPath = "src/test/resources/expected/trap-with-extra-columns-no-strict.txt";

  FlowConnector connector = new Hadoop2MR1FlowConnector();
  CSVFormat sourceFormat = CSVFormat.newFormat('\t')
    .withQuote('"')
    .withHeader("id", "first name", "last name", "city", "zip")
    .withEscape('\\')
    .withRecordSeparator('\n');

  CSVFormat sinkFormat = CSVFormat.newFormat('\t')
    .withSkipHeaderRecord()
    .withEscape('\\')
    .withRecordSeparator('\n');

  Tap source = new Hfs(new CsvScheme(sourceFormat, false), sourcePath);
  Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);
  Tap trap = new Hfs(new TextDelimited(true, "\t"), trapPath, SinkMode.REPLACE);

  Pipe pipe = new Pipe("pipe");

  connector.connect("extra-columns-not-strict", source, sink, trap, pipe).complete();

  testPaths(sinkPath, expectedPath);
  testPaths(trapPath, expectedTrapPath);
}

开发者ID:datascienceinc，项目名称:cascading.csv，代码行数:32，代码来源:CsvSchemeTest.java

示例3: testWhenExtraColumnsNotStrictNoHeaders

import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
@Test
public void testWhenExtraColumnsNotStrictNoHeaders() throws Exception {
  String sourcePath = "src/test/resources/input/with-extra-columns-no-header.txt";
  String sinkPath = "src/test/resources/input/sink-no-headers";
  String trapPath = "src/test/resources/input/trap-no-headers";
  String expectedPath = "src/test/resources/expected/with-extra-columns-no-strict-no-header.txt";
  String expectedTrapPath = "src/test/resources/expected/trap-with-extra-columns-no-strict-no-header.txt";

  FlowConnector connector = new Hadoop2MR1FlowConnector();
  CSVFormat sourceFormat = CSVFormat.newFormat('\t')
    .withQuote('"')
    .withEscape('\\')
    .withRecordSeparator('\n');

  CSVFormat sinkFormat = CSVFormat.newFormat('\t')
    .withEscape('\\')
    .withRecordSeparator('\n');

  Tap source = new Hfs(new CsvScheme(sourceFormat, false), sourcePath);
  Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);
  Tap trap = new Hfs(new TextDelimited(false, "\t"), trapPath, SinkMode.REPLACE);

  Pipe pipe = new Pipe("pipe");

  connector.connect("test-extra-columns-no-header", source, sink, trap, pipe).complete();
  testPaths(sinkPath, expectedPath);
  testPaths(trapPath, expectedTrapPath);
}

开发者ID:datascienceinc，项目名称:cascading.csv，代码行数:29，代码来源:CsvSchemeTest.java

示例4: main

import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
public static void main(String[] args) {

		if (args.length < 2) {
			throw new IllegalArgumentException("Please specify input and ouput paths as arguments.");
		}

		Fields token = new Fields( "token", String.class );
		Fields text = new Fields( "text" );
		RegexSplitGenerator splitter = new RegexSplitGenerator( token, "\\s+" );
		// only returns "token"
		Pipe docPipe = new Each( "token", text, splitter, Fields.RESULTS );

		Pipe wcPipe = new Pipe( "wc", docPipe );
		wcPipe = new AggregateBy( wcPipe, token, new CountBy(new Fields("count")));

		Tap inTap = new Hfs(new TextDelimited(text, "\n" ), args[0]);
		Tap outTap = new Hfs(new TextDelimited(false, "\n"), args[1], SinkMode.REPLACE);

		FlowDef flowDef = FlowDef.flowDef().setName( "wc" )
				.addSource( docPipe, inTap )
				.addTailSink( wcPipe, outTap );

		FlowConnector flowConnector = new FlinkConnector();

		Flow wcFlow = flowConnector.connect( flowDef );

		wcFlow.complete();
	}

开发者ID:dataArtisans，项目名称:cascading-flink，代码行数:29，代码来源:WordCount.java

示例5: main

import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
public static void main(String[] args) {
  String salesPath = args[0];
  String storePath = args[1];
  String outPath = args[2];
  String date = "2452229";

  Properties properties = new Properties();
  AppProps.setApplicationJarClass(properties, Main.class);
  HadoopFlowConnector flowConnector = new HadoopFlowConnector(properties);

  Tap salesTap = new Hfs(new ORCFile(null, "0,7"), salesPath);
  Tap storeTap = new Hfs(new AvroScheme(), storePath);
  Tap outTap = new Hfs(new TextDelimited(true, "\t"), outPath);

  Pipe salesPipe = new Each("sales", new Fields("solddatesk"), new DateFilter(Integer.valueOf(date)));
  Pipe storePipe = new Pipe("store");
  Pipe joinPipe = new HashJoin(salesPipe, new Fields("storesk"), storePipe, new Fields("storeSk"));

  // _col24 is state_name
  Pipe countPipe = new CountBy(joinPipe, new Fields("state"),
      new Fields("item_count"));

  FlowDef flowDef = FlowDef.flowDef().setName("count")
      .addSource(salesPipe, salesTap)
      .addSource(storePipe, storeTap)
      .addTailSink(countPipe, outTap);
      //.addTailSink(joinPipe, outTap);

  Flow countFlow = flowConnector.connect(flowDef);
  countFlow.complete();
}

开发者ID:cartershanklin，项目名称:orcfile-demos，代码行数:32，代码来源:Main.java

示例6: main

import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
public static void main(String[] args) {
  String fooInputPath = args[0];
  String barInputPath = args[1];
  String outputPath = args[2];
  int fooValMax = Integer.parseInt(args[3]);
  int joinValMax = Integer.parseInt(args[4]);
  int numberOfReducers = Integer.parseInt(args[5]);

  Properties properties = new Properties();
  AppProps.setApplicationJarClass(properties,
      JoinFilterExampleCascading.class);
  properties.setProperty("mapred.reduce.tasks", Integer.toString(numberOfReducers));
  properties.setProperty("mapreduce.job.reduces", Integer.toString(numberOfReducers));
  
  SpillableProps props = SpillableProps.spillableProps()
      .setCompressSpill( true )
      .setMapSpillThreshold( 50 * 1000 );
      

  
  HadoopFlowConnector flowConnector = new HadoopFlowConnector(properties);

  // create source and sink taps
  Fields fooFields = new Fields("fooId", "fooVal", "foobarId");
  Tap fooTap = new Hfs(new TextDelimited(fooFields, "|"), fooInputPath);
  Fields barFields = new Fields("barId", "barVal");
  Tap barTap = new Hfs(new TextDelimited(barFields, "|"), barInputPath);

  Tap outputTap = new Hfs(new TextDelimited(false, "|"), outputPath);

  Fields joinFooFields = new Fields("foobarId");
  Fields joinBarFields = new Fields("barId");

  Pipe fooPipe = new Pipe("fooPipe");
  Pipe barPipe = new Pipe("barPipe");

  Pipe fooFiltered = new Each(fooPipe, fooFields, new FooFilter(fooValMax));

  Pipe joinedPipe = new HashJoin(fooFiltered, joinFooFields, barPipe,
      joinBarFields);
  props.setProperties( joinedPipe.getConfigDef(), Mode.REPLACE );
  
  
  Fields joinFields = new Fields("fooId", "fooVal", "foobarId", "barVal");
  Pipe joinedFilteredPipe = new Each(joinedPipe, joinFields,
      new JoinedFilter(joinValMax));

  FlowDef flowDef = FlowDef.flowDef().setName("wc")
      .addSource(fooPipe, fooTap).addSource(barPipe, barTap)
      .addTailSink(joinedFilteredPipe, outputTap);

  Flow wcFlow = flowConnector.connect(flowDef);
  wcFlow.writeDOT("dot/wc.dot");
  wcFlow.complete();
}

开发者ID:amitchmca，项目名称:hadooparchitecturebook，代码行数:56，代码来源:JoinFilterExampleCascading.java

示例7: sourceTap

import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
private Tap sourceTap() {
    return new Hfs(new TextDelimited(new Fields("line")), INPUT);
}

开发者ID:xushjie1987，项目名称:es-hadoop-v2.2.0，代码行数:4，代码来源:AbstractCascadingHadoopJsonSaveTest.java

示例8: sourceTap

import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
private Tap sourceTap() {
    return new Hfs(new TextDelimited(new Fields("id", "name", "url", "picture", "ts")), INPUT);
}

开发者ID:xushjie1987，项目名称:es-hadoop-v2.2.0，代码行数:4，代码来源:AbstractCascadingHadoopSaveTest.java

示例9: getDelimitedFile

import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
@Override
public Tap getDelimitedFile(Fields fields, boolean hasHeader, String delimiter, String quote,
							Class[] types, String filename, SinkMode mode) {
	return new Hfs( new TextDelimited( fields, hasHeader, delimiter, quote, types ), filename, mode );
}

开发者ID:dataArtisans，项目名称:cascading-flink，代码行数:6，代码来源:FlinkTestPlatform.java

示例10: sourceTap

import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
private Tap sourceTap() {
    return new Hfs(new TextDelimited(new Fields("id", "name", "url", "picture", "ts", "tag")), INPUT);
}

开发者ID:elastic，项目名称:elasticsearch-hadoop，代码行数:4，代码来源:AbstractCascadingHadoopSaveTest.java

示例11: tsv

import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
static protected Scheme tsv(Fields fields) {
  TextDelimited scheme = new TextDelimited(fields, true, true, "\t");
  scheme.setNumSinkParts(10);
  return scheme;
}

开发者ID:vijaykramesh，项目名称:sponges_and_filters，代码行数:6，代码来源:TapFactory.java

示例12: main

import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
static
public void main(String... args) throws Exception {

	if(args.length != 3){
		System.err.println("Usage: hadoop jar job.jar <PMML file> <HFS source> <HFS sink>");

		System.exit(-1);
	}

	Evaluator evaluator = PMMLPlannerUtil.createEvaluator(new File(args[0]));

	Properties properties = new Properties();

	AppProps.setApplicationJarClass(properties, Main.class);

	FlowConnector connector = new HadoopFlowConnector(properties);

	FlowDef flowDef = FlowDef.flowDef();

	Tap source = new Hfs(new TextDelimited(true, ","), args[1]);
	flowDef = flowDef.addSource("input", source);

	Tap sink = new Hfs(new TextDelimited(true, ","), args[2]);
	flowDef = flowDef.addSink("output", sink);

	PMMLPlanner pmmlPlanner = new PMMLPlanner(evaluator);
	pmmlPlanner.setRetainOnlyActiveFields();
	pmmlPlanner.setHeadName("input");
	pmmlPlanner.setTailName("output");

	flowDef = flowDef.addAssemblyPlanner(pmmlPlanner);

	Flow<?> flow = connector.connect(flowDef);

	flow.complete();
}

开发者ID:jpmml，项目名称:jpmml-cascading，代码行数:37，代码来源:Main.java

注：本文中的cascading.scheme.hadoop.TextDelimited类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。