本文整理汇总了Java中wherehows.common.DatasetPath.separatedDataset方法的典型用法代码示例。如果您正苦于以下问题:Java DatasetPath.separatedDataset方法的具体用法?Java DatasetPath.separatedDataset怎么用?Java DatasetPath.separatedDataset使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类wherehows.common.DatasetPath
的用法示例。
在下文中一共展示了DatasetPath.separatedDataset方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: separatedDataset
import wherehows.common.DatasetPath; //导入方法依赖的package包/类
@Test
public void separatedDataset() {
String sample = "/jobs/search/search-metrics/core-metrics/sessions/daily/{2015/10/20,2017}/desktop, /data/something/{1232,422}, /qfewfs/afasd/zxc";
List<String> result = DatasetPath.separatedDataset(sample);
String[] expecteddResult = {"/jobs/search/search-metrics/core-metrics/sessions/daily/{2015/10/20,2017}/desktop",
"/data/something/{1232,422}", "/qfewfs/afasd/zxc"};
for (int i = 0; i < result.size(); i++) {
Assert.assertEquals(result.get(i), expecteddResult[i]);
}
}
示例2: convertLineageDataset
import wherehows.common.DatasetPath; //导入方法依赖的package包/类
private static LineageRecord convertLineageDataset(LineageDatasetRecord lineageDataset, JobExecutionRecord jobExec)
throws Exception {
final LineageRecord record = new LineageRecord(jobExec.getAppId(), jobExec.getFlowExecutionId(), jobExec.getName(),
jobExec.getExecutionId());
record.setFlowPath(jobExec.getTopLevelFlowName());
record.setJobExecUUID(jobExec.getExecutionGuid());
record.setSourceTargetType(lineageDataset.getSourceTargetType());
record.setOperation(lineageDataset.getOperation());
record.setJobStartTime((int) (jobExec.getStartTime() / 1000));
record.setJobEndTime((int) (jobExec.getEndTime() / 1000));
if (lineageDataset.getPartition() != null) {
record.setPartitionStart(lineageDataset.getPartition().getMinPartitionValue());
record.setPartitionEnd(lineageDataset.getPartition().getMaxPartitionValue());
record.setPartitionType(lineageDataset.getPartition().getPartitionType());
}
if (lineageDataset.getDatasetUrn() != null) {
record.setFullObjectName(lineageDataset.getDatasetUrn());
} else if (lineageDataset.getDatasetProperties() != null
&& lineageDataset.getDatasetProperties().getUri() != null) {
record.setFullObjectName(lineageDataset.getDatasetProperties().getUri());
}
if (record.getFullObjectName() != null) {
List<String> abstractPaths = DatasetPath.separatedDataset(record.getFullObjectName());
if (abstractPaths.size() > 0) {
record.setAbstractObjectName(abstractPaths.get(0));
}
}
return record;
}
示例3: seperatedDatasetTest2
import wherehows.common.DatasetPath; //导入方法依赖的package包/类
@Test
public void seperatedDatasetTest2() {
String sample2 = "/.{/jobs/snusm/online/modeling/train-data/slot-1,/jobs/snusm/online/modeling/test-data/slot-1}";
List<String> result2 = DatasetPath.separatedDataset(sample2);
Assert.assertEquals(result2.get(0), sample2);
}
示例4: getLineageFromLog
import wherehows.common.DatasetPath; //导入方法依赖的package包/类
/**
* Mining lineage from Azkaban log
* @param log azkaban log
* @param azkabanJobExecRecord contain the job execution info to construct the result
* @return
*/
public static List<LineageRecord> getLineageFromLog(String log, AzkabanJobExecRecord azkabanJobExecRecord, Integer defaultDatabaseId) {
List<LineageRecord> result = new ArrayList<>();
Pattern typePattern = Pattern.compile("^(\\w+):/.*");
String datasetType = "";
for (LogLineagePattern patternObject : logLineagePatterns) {
Pattern pattern = Pattern.compile(patternObject.regex);
Matcher matcher = pattern.matcher(log);
while (matcher.find()) {
String datasetString = matcher.group(patternObject.datasetIndex);
System.out.println("MATCH STRING: " + datasetString + "\n");
List<String> datasets = DatasetPath.separatedDataset(datasetString);
for (String dataset : datasets) {
if (patternObject.databaseNameIndex > 0) {
// add the database name if it exists
dataset = matcher.group(patternObject.databaseNameIndex) + "/" + dataset;
}
LineageRecord lineageRecord =
new LineageRecord(azkabanJobExecRecord.getAppId(), azkabanJobExecRecord.getFlowExecId(),
azkabanJobExecRecord.getJobName(), azkabanJobExecRecord.getJobExecId());
Matcher typeMatcher = typePattern.matcher(dataset);
if (typeMatcher.matches()) {
datasetType = typeMatcher.group(1);
} else {
datasetType = "hdfs";
}
lineageRecord.setDatasetInfo(defaultDatabaseId, dataset, datasetType);
long recordCount =
(patternObject.recordCountIndex < 1) ? 0 : Long.valueOf(matcher.group(patternObject.recordCountIndex));
long insertCount =
(patternObject.insertCountIndex < 1) ? 0 : Long.valueOf(matcher.group(patternObject.insertCountIndex));
long deleteCount =
(patternObject.deleteCountIndex < 1) ? 0 : Long.valueOf(matcher.group(patternObject.deleteCountIndex));
long updateCount =
(patternObject.updateCountIndex < 1) ? 0 : Long.valueOf(matcher.group(patternObject.updateCountIndex));
lineageRecord
.setOperationInfo(patternObject.sourceTargetType, patternObject.operation, recordCount, insertCount,
deleteCount, updateCount, azkabanJobExecRecord.getStartTime(), azkabanJobExecRecord.getEndTime(),
azkabanJobExecRecord.getFlowPath());
result.add(lineageRecord);
}
}
}
return result;
}
示例5: getLineageFromLog
import wherehows.common.DatasetPath; //导入方法依赖的package包/类
/**
* Mining lineage from Azkaban log
* @param log azkaban log
* @param azkabanJobExecRecord contain the job execution info to construct the result
* @return
*/
public static List<LineageRecord> getLineageFromLog(String log, AzkabanJobExecRecord azkabanJobExecRecord, Integer defaultDatabaseId) {
List<LineageRecord> result = new ArrayList<>();
Pattern typePattern = Pattern.compile("^(\\w+):/.*");
String datasetType = "";
for (LogLineagePattern patternObject : logLineagePatterns) {
Pattern pattern = Pattern.compile(patternObject.regex);
Matcher matcher = pattern.matcher(log);
while (matcher.find()) {
String datasetString = matcher.group(patternObject.datasetIndex);
List<String> datasets = DatasetPath.separatedDataset(datasetString);
for (String dataset : datasets) {
if (patternObject.databaseNameIndex > 0) {
// add the database name if it exists
dataset = matcher.group(patternObject.databaseNameIndex) + "/" + dataset;
}
LineageRecord lineageRecord =
new LineageRecord(azkabanJobExecRecord.getAppId(), azkabanJobExecRecord.getFlowExecId(),
azkabanJobExecRecord.getJobName(), azkabanJobExecRecord.getJobExecId());
Matcher typeMatcher = typePattern.matcher(dataset);
if (typeMatcher.matches()) {
datasetType = typeMatcher.group(1);
} else {
datasetType = "hdfs";
}
lineageRecord.setDatasetInfo(defaultDatabaseId, dataset, datasetType);
long recordCount =
(patternObject.recordCountIndex < 1) ? 0 : Long.valueOf(matcher.group(patternObject.recordCountIndex));
long insertCount =
(patternObject.insertCountIndex < 1) ? 0 : Long.valueOf(matcher.group(patternObject.insertCountIndex));
long deleteCount =
(patternObject.deleteCountIndex < 1) ? 0 : Long.valueOf(matcher.group(patternObject.deleteCountIndex));
long updateCount =
(patternObject.updateCountIndex < 1) ? 0 : Long.valueOf(matcher.group(patternObject.updateCountIndex));
lineageRecord
.setOperationInfo(patternObject.sourceTargetType, patternObject.operation, recordCount, insertCount,
deleteCount, updateCount, azkabanJobExecRecord.getStartTime(), azkabanJobExecRecord.getEndTime(),
azkabanJobExecRecord.getFlowPath());
result.add(lineageRecord);
}
}
}
return result;
}