当前位置: 首页>>代码示例>>Java>>正文


Java DataSource.flatMap方法代码示例

本文整理汇总了Java中org.apache.flink.api.java.operators.DataSource.flatMap方法的典型用法代码示例。如果您正苦于以下问题:Java DataSource.flatMap方法的具体用法?Java DataSource.flatMap怎么用?Java DataSource.flatMap使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.flink.api.java.operators.DataSource的用法示例。


在下文中一共展示了DataSource.flatMap方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: start

import org.apache.flink.api.java.operators.DataSource; //导入方法依赖的package包/类
public static void start( MachineLearningDefinienListConfig config ){
    LOG.info("Start machine learning approach for listing identifier-definien pairs");
    // first, create a flink environment
    ExecutionEnvironment flinkEnv = ExecutionEnvironment.getExecutionEnvironment();
    flinkEnv.setParallelism( config.getParallelism() );

    LOG.debug("Read wikidump via flink");
    DataSource<String> dataSource = FlinkMlpRelationFinder.readWikiDump( config, flinkEnv );

    LOG.debug("Parse documents via flink");
    FlatMapOperator<String, RawWikiDocument> mapOperator = dataSource.flatMap(new TextExtractorMapper());

    LOG.debug("Open text annotator mapper");
    TextAnnotatorMapper annotatorMapper = new TextAnnotatorMapper(config);
    // ML approach doesn't create PosTagger here ... strange, so I will use it now.
    annotatorMapper.open(null);
    DataSet<ParsedWikiDocument> parsedDocuments = mapOperator.map( annotatorMapper );

    LOG.debug("Create feature Extractor without Gouldi");
    CreateCandidatesMapper candidatesMapper = new CreateCandidatesMapper(config);
    DataSet<WikiDocumentOutput> outputDataSet = parsedDocuments.map( candidatesMapper );

    LOG.debug("Map to output format.");
    RelationMapper outputMapper = new RelationMapper();
    DataSet<LinkedList<String[]>> outputs = outputDataSet.map(outputMapper);

    Path outputPath = Paths.get(config.getOutputDir(), OUTPUT_FILE_NAME);
    LOG.info("Write output file " + outputPath.toString() );
    outputs.writeAsFormattedText(
        outputPath.toString(),
        FileSystem.WriteMode.OVERWRITE,
        new OutputFormatter()
    ).setParallelism(1);

    try {
        flinkEnv.execute();
    } catch (Exception e) {
        LOG.error("Error due execution of flink process.", e);
    }
}
 
开发者ID:ag-gipp,项目名称:mathosphere,代码行数:41,代码来源:MachineLearningRelationExtractor.java

示例2: main

import org.apache.flink.api.java.operators.DataSource; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment
				.getExecutionEnvironment();

		DataSource<String> inputNodesAndValue = env.readTextFile(argPathToNodesAndValues);

		DataSource<String> inputIndex = env.readTextFile(argPathToIndex);

		DataSet<Tuple2<String, Long>> nodes = inputIndex
				.flatMap(new NodeReader());

		/* Convert the input as (node, value) */
		DataSet<Tuple2<Long, Double>> nodesAndValue = inputNodesAndValue.flatMap(new ValueReader());		

		// Output 1, ID, degree for group by
		DataSet<Tuple3<Long, Long, Double>> topKMapper = nodesAndValue
				.flatMap(new TopKMapper());

		// Get topK
		DataSet<Tuple3<Long, Long, Double>> topKReducer = topKMapper.groupBy(0)
				.sortGroup(2, Order.DESCENDING).first(topK);

		// Node ID joins with node's name
		DataSet<Tuple2<String, Double>> topKwithName = topKReducer.join(nodes)
				.where(1).equalTo(1).flatMap(new ProjectNodeWithName());

		topKwithName.writeAsCsv(argPathOut, WriteMode.OVERWRITE);

		env.execute();
	}
 
开发者ID:HungUnicorn,项目名称:aim3project.graph,代码行数:36,代码来源:TopKName.java

示例3: main

import org.apache.flink.api.java.operators.DataSource; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
	if (!parseParameters(args)) {
		return;
	}

	ExecutionEnvironment env = ExecutionEnvironment
			.getExecutionEnvironment();
	
	DataSource<String> inputArc = env
			.readTextFile(argPathToArc);

	/* Convert the input to arcs, consisting of (source, target) */
	DataSet<Tuple2<Long, Long>> arcs = inputArc.flatMap(new ArcReader());

	DataSet<Tuple3<Long, Long, Double>> srcIncMat = arcs.map(
			new SourceIncMatrix()).name("S(G)");

	srcIncMat.writeAsCsv(argPathOut, "\n", "\t",
			FileSystem.WriteMode.OVERWRITE);

	env.execute();		
}
 
开发者ID:HungUnicorn,项目名称:aim3project.graph,代码行数:23,代码来源:SourceIncidence.java

示例4: main

import org.apache.flink.api.java.operators.DataSource; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment
				.getExecutionEnvironment();

		DataSource<String> input = env.readTextFile(argPathToArc);

		/* Convert the input to edges, consisting of (source, target) */
		DataSet<Tuple2<Long, Long>> edges = input.flatMap(new EdgeReader());

		/* Create a dataset of all vertex ids and count them */
		DataSet<Long> numVertices = edges.<Tuple1<Long>> project(0)
				.union(edges.<Tuple1<Long>> project(1)).distinct()
				.reduceGroup(new CountVertices());

		/* Compute the degree of every vertex */
		DataSet<Tuple2<Long, Long>> verticesWithDegree = edges
				.<Tuple1<Long>> project(0)
				// difference of out-degree and in-degree is project(0), group
				// by source
				.groupBy(0).reduceGroup(new DegreeOfVertex());

		/* Compute the degree distribution */
		DataSet<Tuple2<Long, Double>> degreeDistribution = verticesWithDegree
				.groupBy(1).reduceGroup(new DistributionElement())
				.withBroadcastSet(numVertices, "numVertices");

		degreeDistribution.writeAsCsv(argPathOut,
				FileSystem.WriteMode.OVERWRITE);

		env.execute();
	}
 
开发者ID:HungUnicorn,项目名称:aim3project.graph,代码行数:37,代码来源:OutDegreeDistribution.java

示例5: main

import org.apache.flink.api.java.operators.DataSource; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment
				.getExecutionEnvironment();

		DataSource<String> input = env.readTextFile(argPathToArc);

		/* Convert the input to edges, consisting of (source, target) */
		DataSet<Tuple2<Long, Long>> edges = input.flatMap(new EdgeReader());

		/* Create a dataset of all vertex ids and count them */
		DataSet<Long> numVertices = edges.<Tuple1<Long>>project(0)
				.union(edges.<Tuple1<Long>>project(1)).distinct()
				.reduceGroup(new CountVertices());

		/* Compute the degree of every vertex */
		DataSet<Tuple2<Long, Long>> verticesWithDegree = edges.<Tuple1<Long>>project(1)
		// difference of out-degree and in-degree is project(1), group by target
				.groupBy(0).reduceGroup(new DegreeOfVertex());

		/* Compute the degree distribution */
		DataSet<Tuple2<Long, Double>> degreeDistribution = verticesWithDegree
				.groupBy(1).reduceGroup(new DistributionElement())
				.withBroadcastSet(numVertices, "numVertices");

		degreeDistribution.writeAsCsv(argPathOut,
				FileSystem.WriteMode.OVERWRITE);

		env.execute();
	}
 
开发者ID:HungUnicorn,项目名称:aim3project.graph,代码行数:35,代码来源:InDegreeDistribution.java

示例6: main

import org.apache.flink.api.java.operators.DataSource; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

    DataSource<String> input = env.readTextFile(Config.pathToSlashdotZoo());

    /* Convert the input to edges, consisting of (source, target, isFriend ) */
    DataSet<Tuple3<Long, Long, Boolean>> edges = input.flatMap(new EdgeReader());

    /* Create a dataset of all vertex ids and count them */
    DataSet<Long> numVertices =
        edges.project(0).types(Long.class)
            .union(edges.project(1).types(Long.class))
            .distinct().reduceGroup(new CountVertices());

    /* Compute the degree of every vertex */
    DataSet<Tuple2<Long, Long>> verticesWithDegree =
        edges.project(0).types(Long.class)
             .groupBy(0).reduceGroup(new DegreeOfVertex());

    /* Compute the degree distribution */
    DataSet<Tuple2<Long, Double>> degreeDistribution =
        verticesWithDegree.groupBy(1).reduceGroup(new DistributionElement())
                                     .withBroadcastSet(numVertices, "numVertices");

    degreeDistribution.writeAsText(Config.outputPath(), FileSystem.WriteMode.OVERWRITE);

    env.execute();
  }
 
开发者ID:sscdotopen,项目名称:aim3,代码行数:30,代码来源:OutDegreeDistribution.java

示例7: extractRedirectMappings

import org.apache.flink.api.java.operators.DataSource; //导入方法依赖的package包/类
public static DataSet<RedirectMapping> extractRedirectMappings(ExecutionEnvironment env, DataSource<String> wikiDump) {
    return wikiDump.flatMap(new FlatMapFunction<String, RedirectMapping>() {
        @Override
        public void flatMap(String content, Collector<RedirectMapping> out) throws Exception {
            Pattern pattern = Pattern.compile(REGEX, Pattern.DOTALL);

            Matcher m = pattern.matcher(content);
            // if the record does not contain parsable page-xml
            if (!m.find()) return;

            // otherwise create a WikiDocument object from the xml
            WikiDocument doc = new WikiDocument();

            doc.setId(Integer.parseInt(m.group(3)));
            doc.setTitle(WikiSimStringUtils.unescapeEntities(m.group(1)));
            doc.setNS(Integer.parseInt(m.group(2)));

            if (doc.getNS() != 0) return;

            Pattern redirect = Pattern.compile("<redirect title=\"(.+?)\"", Pattern.CASE_INSENSITIVE);
            Matcher mr = redirect.matcher(content);

            if (!mr.find()) return;

            out.collect(new RedirectMapping(
                    doc.getTitle(),
                    WikiSimStringUtils.unescapeEntities(mr.group(1))
            ));
        }
    });
}
 
开发者ID:wikimedia,项目名称:citolytics,代码行数:32,代码来源:RedirectExtractor.java

示例8: extractIdTitleMapping

import org.apache.flink.api.java.operators.DataSource; //导入方法依赖的package包/类
public static DataSet<IdTitleMapping> extractIdTitleMapping(ExecutionEnvironment env, DataSource<String> wikiDump) {
    return wikiDump.flatMap(new  FlatMapFunction<String, IdTitleMapping>() {
        @Override
        public void flatMap(String s, Collector<IdTitleMapping> out) throws Exception {
            DocumentProcessor dp = new DocumentProcessor();
            WikiDocument doc = dp.processDoc(s);

            if(doc != null) {
                out.collect(new IdTitleMapping(doc.getId(), doc.getTitle()));
            }
        }
    });
}
 
开发者ID:wikimedia,项目名称:citolytics,代码行数:14,代码来源:IdTitleMappingExtractor.java

示例9: main

import org.apache.flink.api.java.operators.DataSource; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment
				.getExecutionEnvironment();		

		DataSource<String> inputArc = env
				.readTextFile(argPathToArc);

		/* Convert the input to arcs, consisting of (source, target) */
		DataSet<Tuple2<Long, Long>> arcs = inputArc.flatMap(new ArcReader());

		DataSet<Tuple3<Long, Long, Double>> tarIncMat = arcs.map(
				new TargetIncMatrix()).name("T(G)");

		tarIncMat.writeAsCsv(argPathOut, "\n", "\t",
				FileSystem.WriteMode.OVERWRITE);
		
		env.execute();
		
	}
 
开发者ID:HungUnicorn,项目名称:aim3project.graph,代码行数:25,代码来源:TargetIncidence.java

示例10: main

import org.apache.flink.api.java.operators.DataSource; //导入方法依赖的package包/类
public static void main(String... args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		// set up execution environment
		ExecutionEnvironment env = ExecutionEnvironment
				.getExecutionEnvironment();

		DataSource<String> inputArc = env
				.readTextFile(edgesPath);

		DataSource<String> inputIndex = env.readTextFile(verticesPath);

		DataSet<Long> vertices = inputIndex.flatMap(new NodeReader());

		/* Convert the input to edges, consisting of (source, target) */
		DataSet<Tuple2<Long, Long>> arcs = inputArc.flatMap(new ArcReader());

		// Undirected graph (arc becomes edge)
		DataSet<Tuple2<Long, Long>> edges = arcs.flatMap(new UndirectEdge())
				.distinct();

		// assign the initial components (equal to the vertex id)
		DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices
				.map(new DuplicateValue<Long>());

		// Open a delta iteration
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId
				.iterateDelta(verticesWithInitialId, maxIterations, 0);

		// Apply the step logic: join with the edges, select the minimum
		// neighbor, update if the component of the candidate is smaller
		DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset()
				.join(edges).where(0).equalTo(0)
				.with(new NeighborWithComponentIDJoin()).groupBy(0)
				.aggregate(Aggregations.MIN, 1)
				.join(iteration.getSolutionSet()).where(0).equalTo(0)
				.with(new ComponentIdFilter());

		// close the delta iteration (delta and new workset are identical)
		DataSet<Tuple2<Long, Long>> vertexWithComponentID = iteration
				.closeWith(changes, changes);
		
		// Size of Component
		DataSet<Long> numComponent = vertexWithComponentID.<Tuple1<Long>>project(1).distinct().reduceGroup(new CountComponent());

		/* Compute the size of every component, emit (Component size, 1) */
		DataSet<Tuple2<Long, Long>> ComponentCount = vertexWithComponentID
				.<Tuple1<Long>>project(1).groupBy(0)
				.reduceGroup(new ComponentCount()).flatMap(new ComponentMap());

		DataSet<Tuple2<Long, Long>> ComponentDistribution = ComponentCount
				.groupBy(0).aggregate(Aggregations.SUM, 1);

		// Emit result
		if (fileOutput) {
			ComponentDistribution.writeAsCsv(outputPath, "\n", " ",
					FileSystem.WriteMode.OVERWRITE);
			//numComponent.print();
		} else {
			numComponent.print();
			ComponentDistribution.print();
		}
		
		env.execute("Weakly Connected Components");
	}
 
开发者ID:HungUnicorn,项目名称:aim3project.graph,代码行数:69,代码来源:WeakConnectedComponents.java

示例11: main

import org.apache.flink.api.java.operators.DataSource; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment
				.getExecutionEnvironment();

		DataSource<String> inputArc = env.readTextFile(argPathToArc);

		DataSource<String> inputIndex = env.readTextFile(argPathToIndex);

		DataSet<Tuple2<String, Long>> nodes = inputIndex
				.flatMap(new NodeReader());

		/* Convert the input to edges, consisting of (source, target) */
		DataSet<Tuple2<Long, Long>> arcs = inputArc.flatMap(new ArcReader());

		/* Compute the degree of every vertex */
		DataSet<Tuple2<Long, Long>> verticesWithDegree = arcs
				.<Tuple1<Long>> project(0).groupBy(0)
				.reduceGroup(new DegreeOfVertex());

		// Focus on the nodes' degree higher than average degree
		DataSet<Tuple2<Long, Long>> highOutDegree = verticesWithDegree
				.filter(new DegreeFilter());

		// Output 1, ID, degree for group by
		DataSet<Tuple3<Long, Long, Long>> topKMapper = highOutDegree
				.flatMap(new TopKMapper());

		// Get topK
		DataSet<Tuple3<Long, Long, Long>> topKReducer = topKMapper.groupBy(0)
				.sortGroup(2, Order.DESCENDING).first(topK);

		// Node ID joins with node's name
		DataSet<Tuple2<String, Long>> topKwithName = topKReducer.join(nodes)
				.where(1).equalTo(1).flatMap(new ProjectNodeWithName());

		topKwithName.writeAsCsv(argPathOut, WriteMode.OVERWRITE);

		env.execute();
	}
 
开发者ID:HungUnicorn,项目名称:aim3project.graph,代码行数:45,代码来源:TopKOutDegree.java

示例12: main

import org.apache.flink.api.java.operators.DataSource; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment
				.getExecutionEnvironment();

		DataSource<String> inputArc = env.readTextFile(argPathToArc);

		DataSource<String> inputIndex = env.readTextFile(argPathToIndex);

		DataSet<Tuple2<String, Long>> nodes = inputIndex
				.flatMap(new NodeReader());

		/* Convert the input to edges, consisting of (source, target) */
		DataSet<Tuple2<Long, Long>> arcs = inputArc.flatMap(new ArcReader());

		/* Compute the degree of every vertex */
		DataSet<Tuple2<Long, Long>> verticesWithDegree = arcs
				.<Tuple1<Long>> project(1).groupBy(0)
				.reduceGroup(new DegreeOfVertex());

		// Focus on the nodes' degree higher than certain degree
		DataSet<Tuple2<Long, Long>> highOutDegree = verticesWithDegree
				.filter(new DegreeFilter());

		// Output 1, ID, degree for group by 0
		DataSet<Tuple3<Long, Long, Long>> topKMapper = highOutDegree
				.flatMap(new TopKMapper());

		// Get topK
		DataSet<Tuple3<Long, Long, Long>> topKReducer = topKMapper.groupBy(0)
				.sortGroup(2, Order.DESCENDING).first(topK);

		// Node ID joins with node's name
		DataSet<Tuple2<String, Long>> topKwithName = topKReducer.join(nodes)
				.where(1).equalTo(1).flatMap(new ProjectNodeWithName());

		topKwithName.writeAsCsv(argPathOut, WriteMode.OVERWRITE);

		env.execute();
	}
 
开发者ID:HungUnicorn,项目名称:aim3project.graph,代码行数:45,代码来源:TopKInDegree.java

示例13: main

import org.apache.flink.api.java.operators.DataSource; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment
				.getExecutionEnvironment();

		// Read the input files - pages and links
		DataSource<String> inputPages = env.readTextFile(argPathToIndex);
		DataSet<Tuple1<Long>> pages = inputPages.flatMap(new PageReader());

		DataSource<String> inputLinks = env.readTextFile(argPathToArc);
		DataSet<Tuple2<Long, Long>> links = inputLinks
				.flatMap(new LinkReader());

		// Get the total count of pages
		DataSet<Long> numPages = pages.reduceGroup(new CountPages());

		// Find sinks
		DataSet<Tuple1<Long>> noOutgoingLinks = pages.flatMap(new FindSinks())
				.withBroadcastSet(links.<Tuple1<Long>> project(0).distinct(),
						"pages");

		// Point sinks to all other nodes
		DataSet<Tuple2<Long, Long>> sinksToAll = noOutgoingLinks.flatMap(
				new PointToAllOther()).withBroadcastSet(pages, "pages");

		// Assign the initial rank to every page - 1 / numPages
		DataSet<Tuple2<Long, Double>> pagesRanked = pages.map(
				new InitialRanking()).withBroadcastSet(numPages, "numPages");

		// Encode sparse adjacency matrix to a list
		DataSet<Tuple2<Long, Long[]>> sparseMatrix = links.union(sinksToAll)
				.groupBy(0).reduceGroup(new BuildList());

		// Start iteration - Not using DeltaIteration since the whole DataSet is
		// recomputed
		IterativeDataSet<Tuple2<Long, Double>> iterationSet = pagesRanked
				.iterate(maxIterations);

		DataSet<Tuple2<Long, Double>> pageRank = iterationSet
				.
				// Iteratively join the iterationSet with the sparseMatrix
				join(sparseMatrix).where(0)
				.equalTo(0)
				.flatMap(new DistributePageRank())
				.groupBy(0)
				.sum(1)
				.
				// To implement the random teleport behaviour we recompute the
				// pageRank
				// and applying a function on each PageRank which is given by
				// beta * pageRank + ((1 - beta) / numPages)
				map(new RandomTeleport())
				.withBroadcastSet(numPages, "numPages");

		DataSet<Tuple2<Long, Double>> results = iterationSet.closeWith(
				pageRank, pageRank.join(iterationSet).where(0).equalTo(0)
						.filter(new ConvergenceCondition()));

		results.writeAsCsv(argPathOut, WriteMode.OVERWRITE);

		env.execute();
	}
 
开发者ID:HungUnicorn,项目名称:aim3project.graph,代码行数:67,代码来源:PageRank.java

示例14: main

import org.apache.flink.api.java.operators.DataSource; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment
				.getExecutionEnvironment();

		// Read the input files - pages and links
		DataSource<String> inputPages = env.readTextFile(argPathToIndex);
		DataSet<Tuple1<Long>> pages = inputPages.flatMap(new PageReader());

		DataSource<String> inputLinks = env.readTextFile(argPathToArc);
		DataSet<Tuple2<Long, Long>> links = inputLinks
				.flatMap(new LinkReader());

		// Get the total count of pages
		DataSet<Long> numPages = pages.reduceGroup(new CountPages());

		// Find sinks
		DataSet<Tuple1<Long>> noOutgoingLinks = pages.flatMap(new FindSinks())
				.withBroadcastSet(
						links.<Tuple1<Long>>project(0).distinct(), "pages");

		// Point sinks to all other nodes
		DataSet<Tuple2<Long, Long>> sinksToAll = noOutgoingLinks.flatMap(
				new PointToAllOther()).withBroadcastSet(pages, "pages");

		// Assign the initial rank to every page - 1 / numPages
		DataSet<Tuple2<Long, Double>> pagesRanked = pages.map(
				new InitialRanking()).withBroadcastSet(numPages, "numPages");

		// Encode sparse adjacency matrix to a list
		DataSet<Tuple2<Long, Long[]>> sparseMatrix = links.union(sinksToAll)
				.groupBy(0).reduceGroup(new BuildList());

		// Start iteration - Not using DeltaIteration since the whole DataSet is
		// recomputed
		IterativeDataSet<Tuple2<Long, Double>> iterationSet = pagesRanked
				.iterate(maxIterations);

		DataSet<Tuple2<Long, Double>> pageRank = iterationSet
				.
				// Iteratively join the iterationSet with the sparseMatrix
				join(sparseMatrix).where(0)
				.equalTo(0)
				.flatMap(new DistributePageRank())
				.groupBy(0)
				.sum(1)
				.
				// To implement the random teleport behaviour we recompute the
				// pageRank
				// and applying a function on each PageRank which is given by
				// beta * pageRank + ((1 - beta) / numPages)
				map(new RandomTeleport())
				.withBroadcastSet(numPages, "numPages");

		DataSet<Tuple2<Long, Double>> resultsPageRank = iterationSet.closeWith(
				pageRank, pageRank.join(iterationSet).where(0).equalTo(0)
						.filter(new ConvergenceCondition()));

		DataSet<Tuple2<Long, Double>> filterPageRank = resultsPageRank
				.filter(new TopKFilter());

		// Emit (1,node,PageRank)
		DataSet<Tuple3<Long, Long, Double>> mapPageRank = filterPageRank
				.flatMap(new TopKMapper());

		DataSet<Tuple2<Long, Double>> results = mapPageRank.groupBy(0)
				.sortGroup(2, Order.DESCENDING).first(topK).<Tuple2<Long, Double>>project(1, 2);

		results.writeAsCsv(argPathOut, WriteMode.OVERWRITE);

		env.execute();
	}
 
开发者ID:HungUnicorn,项目名称:aim3project.graph,代码行数:77,代码来源:TopKPageRank.java

示例15: main

import org.apache.flink.api.java.operators.DataSource; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

    DataSource<String> input = env.readTextFile(Config.pathToTrainingSet());

    // read input with df-cut
    DataSet<Tuple3<String, String, Long>> labeledTerms = input.flatMap(new DataReader());

    // conditional counter per word per label
    DataSet<Tuple3<String, String, Long>> termCounts = null; // IMPLEMENT ME

    termCounts.writeAsCsv(Config.pathToConditionals(), "\n", "\t", FileSystem.WriteMode.OVERWRITE);

    // word counts per label
    DataSet<Tuple2<String, Long>> termLabelCounts = null; // IMPLEMENT ME

    termLabelCounts.writeAsCsv(Config.pathToSums(), "\n", "\t", FileSystem.WriteMode.OVERWRITE);

    env.execute();
  }
 
开发者ID:sscdotopen,项目名称:aim3,代码行数:22,代码来源:Training.java


注:本文中的org.apache.flink.api.java.operators.DataSource.flatMap方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。