当前位置: 首页>>代码示例>>Java>>正文


Java Marking类代码示例

本文整理汇总了Java中org.aksw.gerbil.transfer.nif.Marking的典型用法代码示例。如果您正苦于以下问题:Java Marking类的具体用法?Java Marking怎么用?Java Marking使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


Marking类属于org.aksw.gerbil.transfer.nif包,在下文中一共展示了Marking类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: findMarkings

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
protected static List<Marking> findMarkings(Set<String> lines, String text) {
	List<Marking> markings = new ArrayList<Marking>();

	for (String line : lines) {
		String[] annotation = line.split("\t");

		int start = Integer.parseInt(annotation[1]);
		int end = Integer.parseInt(annotation[2]);
		int length = end - start;
		String uri = annotation[3];
		if (uri.startsWith("NIL")) {
			uri = "";
		}
		Set<String> types = new HashSet<String>();
		types.add(getTypeURI(annotation[typeIndex]));

		markings.add(new TypedNamedEntity(start, length, uri, types));

	}

	return markings;
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:23,代码来源:Microposts2016Dataset.java

示例2: createDocument

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
protected Document createDocument(String fileName, String text, MSNBC_Result parsedResult) {
    String documentUri = generateDocumentUri(fileName);
    List<Marking> markings = new ArrayList<Marking>(parsedResult.getMarkings().size());
    String retrievedSurfaceForm;
    for (MSNBC_NamedEntity ne : parsedResult.getMarkings()) {
        retrievedSurfaceForm = text.substring(ne.getStartPosition(), ne.getStartPosition() + ne.getLength());
        if (!retrievedSurfaceForm.equals(ne.getSurfaceForm())) {
            LOGGER.warn("In document " + documentUri + ", the expected surface form of the named entity " + ne
                    + " does not fit the surface form derived from the text \"" + retrievedSurfaceForm + "\".");
        }
        addDBpediaUris(ne.getUris());
        markings.add(ne.toNamedEntity());
    }
    Document document = new DocumentImpl(text, documentUri, markings);
    mergeSubNamedEntity(document);
    return document;
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:18,代码来源:MSNBCDataset.java

示例3: loadDocuments

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
private List<Document> loadDocuments(File annFile, File textFile) throws GerbilException {
	List<Document> documents = new ArrayList<Document>();
	String documentUriPrefix = "http://" + getName() + "/";
	try (BufferedReader breader = new BufferedReader(new InputStreamReader(
			new FileInputStream(textFile), Charset.forName("UTF-8")))) {
		String line;
		List<Marking> markings = null;
		while ((line = breader.readLine()) != null) {
			if(line.isEmpty()){
				continue;
			}
			String[] text = line.split("\t");

			markings = findMarkings(text, annFile);
			documents.add(new DocumentImpl(text[1], documentUriPrefix
					+ text[0], markings));
		}
	} catch (IOException e) {
		throw new GerbilException("Exception while reading dataset.", e,
				ErrorTypes.DATASET_LOADING_ERROR);
	}

	return documents;
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:25,代码来源:ERDDataset2.java

示例4: loadDocuments

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
private List<Document> loadDocuments(File annotations, File tweets)
		throws GerbilException {
	List<Document> documents = new ArrayList<Document>();
	String documentUriPrefix = "http://" + getName() + "/";
	//its json per line 
	try (BufferedReader bReader = new BufferedReader(new InputStreamReader(
			new FileInputStream(tweets), Charset.forName("UTF-8")))) {
		String line;
		List<Marking> markings;
		while ((line = bReader.readLine()) != null) {
			JSONObject json = new JSONObject(line);
			
			String id = json.getString("id_str");
			String text = json.getString("text");
			markings = findMarkings(getMarkingLines(annotations, id), text);
			documents.add(new DocumentImpl(text, documentUriPrefix + id,
					markings));
		}
	} catch (IOException e) {
		throw new GerbilException("Exception while reading dataset.", e,
				ErrorTypes.DATASET_LOADING_ERROR);
	}

	return documents;
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:26,代码来源:WSDMDataset.java

示例5: findMarkings

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
public static List<Marking> findMarkings(String tweet) {
	int start = 0;
	List<Marking> markings = new ArrayList<Marking>();
	realTweet = new StringBuilder();
	String[] line = tweet.split("\n");
	int i = 0;
	for (String tokenFull : line) {
		String[] token = tokenFull.split("\t+");
		realTweet.append(token[0] + " ");
		token[1] = token[1].trim();
		if (token.length>2&&token[2].startsWith("B-")) {
			String[] marking = getWholeMarking(line, i);
			Set<String> types = new HashSet<String>();
			types.add(marking[2]);
			markings.add(new TypedNamedEntity(start, marking[0].length(),
					marking[1], types));

		}
		start += token[0].length() + 1;
		i++;
	}

	return markings;
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:25,代码来源:DerczynskiDataset.java

示例6: findMarkings

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
public static List<Marking> findMarkings(String tweet){
	int start=0;
	List<Marking> markings = new ArrayList<Marking>();
	realTweet = new StringBuilder();
	String[] line = tweet.split("\n");
	int i=0;
	for(String tokenFull : line){
		String[] token = tokenFull.split("\t+");
		realTweet.append(token[0]+" ");
		token[1]=token[1].trim();
		if(token[1].startsWith("B-")){
			String[] marking = getWholeMarking(line, i);
			Set<String> types = new HashSet<String>();
			types.add(marking[1]);
			markings.add(new TypedNamedEntity(start, marking[0].length(), "", types));
			
		}
		start+=token[0].length()+1;
		i++;
	}
	
	return markings;
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:24,代码来源:RitterDataset.java

示例7: logResult

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
protected static void logResult(List<? extends Marking> result, String annotatorName, String markingName) {
    StringBuilder builder = new StringBuilder();
    builder.append('[');
    builder.append(annotatorName);
    builder.append("] result=[");
    boolean first = true;
    for (Marking m : result) {
        if (first) {
            first = false;
        } else {
            builder.append(',');
        }
        builder.append(markingName);
        builder.append(m.toString());
    }
    builder.append(']');
    LOGGER.debug(builder.toString());
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:19,代码来源:ErrorCountingAnnotatorDecorator.java

示例8: storeAnnotatorOutput

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
public <T extends Marking> void storeAnnotatorOutput(ExperimentTaskConfiguration configuration,
        List<List<T>> results, List<Document> documents) {
    if (outputShouldBeStored(configuration)) {
        FileOutputStream fout = null;
        GZIPOutputStream gout = null;
        try {
            File file = generateOutputFile(configuration);
            List<Document> resultDocuments = generateResultDocuments(results, documents);
            fout = new FileOutputStream(file);
            gout = new GZIPOutputStream(fout);
            NIFWriter writer = new TurtleNIFWriter();
            writer.writeNIF(resultDocuments, gout);
        } catch (Exception e) {
            LOGGER.error("Couldn't write annotator result to file.", e);
        } finally {
            IOUtils.closeQuietly(gout);
            IOUtils.closeQuietly(fout);
        }
    }
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:21,代码来源:AnnotatorOutputWriter.java

示例9: test

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
@Test
public void test() {
    Set<String> lines = new HashSet<String>();
    for(String m : mentions){
    	lines.add(m);
    }
    List<Marking> markings = Microposts2015Dataset.findMarkings(lines, tweet); 
    Assert.assertNotNull(markings);
    Assert.assertTrue(markings.size() > 0);
    int i =0;
    for(Marking marking : markings){
    	Assert.assertTrue(marking instanceof TypedNamedEntity);
    	TypedNamedEntity ne = (TypedNamedEntity) marking;
    	
    	String mention = tweet.substring(ne.getStartPosition(), ne.getStartPosition() + ne.getLength());
    	Assert.assertEquals(expectedMentions[i], mention);
    	
    	String type = ne.getTypes().iterator().next();
    	Assert.assertEquals(expectedTypes[i], type);
    	i++;
    }
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:23,代码来源:Microposts2015DatasetMentionSearchTest.java

示例10: test

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
@Test
public void test() {
    Set<String> lines = new HashSet<String>();
    for(String m : mentions){
    	lines.add(m);
    }
    List<Marking> markings = Microposts2016Dataset.findMarkings(lines, tweet); 
    Assert.assertNotNull(markings);
    Assert.assertTrue(markings.size() > 0);
    int i =0;
    for(Marking marking : markings){
    	Assert.assertTrue(marking instanceof TypedNamedEntity);
    	TypedNamedEntity ne = (TypedNamedEntity) marking;
    	
    	String mention = tweet.substring(ne.getStartPosition(), ne.getStartPosition() + ne.getLength());
    	Assert.assertEquals(expectedMentions[i], mention);
    	
    	String type = ne.getTypes().iterator().next();
    	Assert.assertEquals(expectedTypes[i], type);
    	i++;
    }
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:23,代码来源:Microposts2016DatasetMentionSearchTest.java

示例11: test

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
@Test
public void test() throws GerbilException {
    MSNBCDataset dataset = new MSNBCDataset(TEST_TEXT_DIR, TEST_ANNOTATION_DIR);
    dataset.setName(DATASET_NAME);
    dataset.init();
    Assert.assertEquals(1, dataset.getInstances().size());
    Document document = dataset.getInstances().get(0);

    Assert.assertEquals(EXPECTED_DOCUMENT_URI, document.getDocumentURI());
    Assert.assertEquals(EXPECTED_TEXT, document.getText());

    Set<Marking> expectedNEs = new HashSet<Marking>(Arrays.asList(EXPECTED_MARKINGS));
    for (Marking marking : document.getMarkings()) {
        Assert.assertTrue("Couldn't find " + marking.toString() + " inside " + expectedNEs.toString(),
                expectedNEs.contains(marking));
    }
    Assert.assertEquals(expectedNEs.size(), document.getMarkings().size());
    IOUtils.closeQuietly(dataset);
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:20,代码来源:MSNBCDatasetTest.java

示例12: test

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
@Test
public void test() {
    Set<String> lines = new HashSet<String>();
    for(String m : mentions){
    	lines.add(m);
    }
    List<Marking> markings = WSDMDataset.findMarkings(lines, tweet);
    Assert.assertNotNull(markings);
    Assert.assertTrue(markings.size() > 0);
    int i =0;
    for(Marking marking : markings){
    	Assert.assertTrue(marking instanceof Annotation);
    	Annotation ne = (Annotation) marking;
    	
    	Assert.assertEquals(expectedMentions[i], ne.getUris().iterator().next());
    	
    	i++;
    }
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:20,代码来源:WSDM2012DatasetMentionSearchTest.java

示例13: test

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
@Test
public void test() throws GerbilException, IOException {
	SensevalDataset data = new SensevalDataset(this.file);
	data.init();
	List<Document> documents = data.getInstances();
	Document doc = documents.get(docIndex);
	assertEquals(expectedSentence, doc.getText());
	List<Marking> markings = doc.getMarkings();
	String[] marks = new String[markings.size()];
	for(int i=0; i<markings.size();i++){
		NamedEntity entity = ((NamedEntity)markings.get(i));
		marks[i]=doc.getText().substring(entity.getStartPosition(), 
				entity.getStartPosition()+entity.getLength());
	}
	assertArrayEquals(expectedMarkings, 
			marks);
	data.close();

}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:20,代码来源:SensevalDatasetTest.java

示例14: reduceToTextAndEntities

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
public static Document reduceToTextAndEntities(Document document) {
    MarkingFilter<TypedNamedEntity> filter = new TypeBasedMarkingFilter<TypedNamedEntity>(false,
            RDFS.Class.getURI(), OWL.Class.getURI());
    List<TypedNamedEntity> namedEntities = document.getMarkings(TypedNamedEntity.class);
    List<Marking> markings = new ArrayList<Marking>(namedEntities.size());
    for (TypedNamedEntity tne : namedEntities) {
        if (filter.isMarkingGood(tne)) {
            markings.add(new NamedEntity(tne.getStartPosition(), tne.getLength(), tne.getUris()));
        }
    }
    return new DocumentImpl(document.getText(), document.getDocumentURI(), markings);
}
 
开发者ID:dice-group,项目名称:Cetus,代码行数:13,代码来源:ExtendedCetusSurfaceFormExtractorTest.java

示例15: main

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
public static void main(String[] args) {
CetusAnnotator annotator = new CetusAnnotator(
	CetusSurfaceFormExtractor.create(),
	YagoBasedTypeSearcher.create());
Document document = annotator
	.performTypeExtraction(new DocumentImpl(
		"Born on December, 1629 and died on 19 August 1686, Jean-Baptiste Cotelier or Cotelerius was a Patristic scholar and Catholic theologian.",
		Arrays.asList((Marking) new NamedEntity(51, 22,
			"http://dbpedia.org/resource/Jean-Baptiste_Cotelier"))));
System.out.println(document);
   }
 
开发者ID:dice-group,项目名称:Cetus,代码行数:12,代码来源:SimpleCetusAnnotatorTestClass.java


注:本文中的org.aksw.gerbil.transfer.nif.Marking类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。