本文整理汇总了Java中edu.jhu.hlt.concrete.TokenTagging类的典型用法代码示例。如果您正苦于以下问题:Java TokenTagging类的具体用法?Java TokenTagging怎么用?Java TokenTagging使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
TokenTagging类属于edu.jhu.hlt.concrete包,在下文中一共展示了TokenTagging类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: StanfordPreNERCommunication
import edu.jhu.hlt.concrete.TokenTagging; //导入依赖的package包/类
/**
*
*/
StanfordPreNERCommunication(final Communication c) throws MiscommunicationException {
this.ctc = new CachedTokenizationCommunication(c);
final List<TokenTagging> ttList = new ArrayList<TokenTagging>();
List<Tokenization> tkzList = this.ctc.getTokenizations();
tkzList.stream().filter(tkz -> tkz.isSetTokenTaggingList())
.forEach(tkz -> ttList.addAll(tkz.getTokenTaggingList()));
this.posTTList = ttList.stream()
.filter(tt -> tt.getTaggingType().equalsIgnoreCase("POS"))
.collect(Collectors.toList());
this.nerTTList = ttList.stream()
.filter(tt -> tt.getTaggingType().equalsIgnoreCase("NER"))
.collect(Collectors.toList());
this.lemmaTTList = ttList.stream()
.filter(tt -> tt.getTaggingType().equalsIgnoreCase("lemma"))
.collect(Collectors.toList());
this.depParseList = new ArrayList<>();
this.ctc.getTokenizations().stream()
.filter(tkz -> tkz.isSetDependencyParseList())
.forEach(tkz -> this.depParseList.addAll(tkz.getDependencyParseList()));
}
示例2: addPos
import edu.jhu.hlt.concrete.TokenTagging; //导入依赖的package包/类
private void addPos(AnnoSentenceCollection sents, Communication comm) {
if (!sents.someHaveAt(AT.POS)) { return; }
List<Tokenization> ts = getTokenizationsCorrespondingTo(sents, comm);
AnnotationMetadata meta = new AnnotationMetadata();
meta.setTool(POS_TOOL);
meta.setTimestamp(timestamp);
for(int i=0; i<sents.size(); i++) {
Tokenization t = ts.get(i);
AnnoSentence s = sents.get(i);
List<TaggedToken> taggedTokens = new ArrayList<>();
for (int j=0; j < s.size(); j++) {
TaggedToken taggedToken = new TaggedToken();
taggedToken.setTag(s.getPosTag(j));
taggedToken.setTokenIndex(j);
taggedTokens.add(taggedToken);
}
TokenTagging tokenTagging = new TokenTagging(getUUID(), meta, taggedTokens);
tokenTagging.setTaggingType("POS");
t.addToTokenTaggingList(tokenTagging);
}
}
示例3: addLemmata
import edu.jhu.hlt.concrete.TokenTagging; //导入依赖的package包/类
private void addLemmata(AnnoSentenceCollection sents, Communication comm) {
if (!sents.someHaveAt(AT.LEMMA)) { return; }
List<Tokenization> ts = getTokenizationsCorrespondingTo(sents, comm);
AnnotationMetadata meta = new AnnotationMetadata();
meta.setTool(LEMMA_TOOL);
meta.setTimestamp(timestamp);
for(int i=0; i<sents.size(); i++) {
Tokenization t = ts.get(i);
AnnoSentence s = sents.get(i);
List<TaggedToken> taggedTokens = new ArrayList<>();
for (int j=0; j < s.size(); j++) {
TaggedToken taggedToken = new TaggedToken();
taggedToken.setTag(s.getLemma(j));
taggedToken.setTokenIndex(j);
taggedTokens.add(taggedToken);
}
TokenTagging tokenTagging = new TokenTagging(getUUID(), meta, taggedTokens);
tokenTagging.setTaggingType("LEMMA");
t.addToTokenTaggingList(tokenTagging);
}
}
示例4: addTagging
import edu.jhu.hlt.concrete.TokenTagging; //导入依赖的package包/类
private static void addTagging(Tokenization tokenization, String tagType, String toolName, String[] tags) {
List<TaggedToken> taggedTokenList = new ArrayList<>();
int i = 0;
for (String tag : tags) {
TaggedToken t = new TaggedToken();
t.setTag(tag);
t.setTokenIndex(i++);
taggedTokenList.add(t);
}
TokenTagging tt = new TokenTagging();
tt.setUuid(getUUID());
tt.setMetadata(getMetadata(toolName));
tt.setTaggedTokenList(taggedTokenList);
tt.setTaggingType(tagType);
tokenization.addToTokenTaggingList(tt);
}
示例5: generateConcreteTokenization
import edu.jhu.hlt.concrete.TokenTagging; //导入依赖的package包/类
/**
* Generate a {@link Tokenization} object from a list of tokens, list of tags, list of offsets, and start position of the text (e.g., first text character in
* the text). Assumes tags are part of speech tags.
*
* Invokes {@link #generateConcreteTokenization(List, int[], int)} then adds tagging.
*
* @see #generateConcreteTokenization(List, int[], int)
*
* @param tokens
* - a {@link List} of tokens (Strings)
* @param offsets
* - an array of integers (offsets)
* @param startPos
* - starting position of the text
* @return a {@link Tokenization} object with correct tokenization and token tagging
*/
public static Tokenization generateConcreteTokenization(String[] tokens, String[] tokenTags, int[] offsets, int startPos) {
Tokenization tokenization = generateConcreteTokenization(tokens, offsets, startPos);
TokenTagging tt = new TokenTagging();
tt.setUuid(UUIDFactory.newUUID());
tt.setTaggingType("twitter");
tt.setMetadata(new AnnotationMetadata(tiftMetadata));
for (int i = 0; i < tokens.length; i++) {
String tag = tokenTags[i];
if (tag != null) {
TaggedToken tok = new TaggedToken();
tok.setTokenIndex(i).setTag(tokenTags[i]);
tt.addToTaggedTokenList(tok);
}
}
// Do not set the tags if everything was "null".
if (tt.isSetTaggedTokenList())
tokenization.addToTokenTaggingList(tt);
return tokenization;
}
示例6: getFirstXTagsWithName
import edu.jhu.hlt.concrete.TokenTagging; //导入依赖的package包/类
private TokenTagging getFirstXTagsWithName(Tokenization tokenization, TagTypes which,
String toolName) throws ConcreteException {
if (!tokenization.isSetTokenTaggingList())
throw new ConcreteException("No TokenTaggings for tokenization: " + tokenization.getUuid());
List<TokenTagging> tokenTaggingLists = tokenization.getTokenTaggingList();
for(int i = 0; i < tokenTaggingLists.size(); i++) {
TokenTagging tt = tokenTaggingLists.get(i);
if(tt.isSetTaggingType() &&
tt.getTaggingType().equals(which.name()) &&
tt.getMetadata().getTool().contains(toolName))
return tt;
}
throw new ConcreteException("Did not find any tag theories with taggingType == " + which +" in tokenization " + tokenization.getUuid() + " with toolname containing " + toolName);
}
示例7: StanfordToConcreteConversionOutput
import edu.jhu.hlt.concrete.TokenTagging; //导入依赖的package包/类
/**
*
*/
public StanfordToConcreteConversionOutput(final List<Token> tokenList,
final TokenTagging nerTT, final TokenTagging posTT, final TokenTagging lemmaTT) {
this.tokenList = tokenList;
this.nerTT = nerTT;
this.posTT = posTT;
this.lemmaTT = lemmaTT;
}
开发者ID:hltcoe,项目名称:concrete-stanford-deprecated2,代码行数:11,代码来源:StanfordToConcreteConversionOutput.java
示例8: getTagging
import edu.jhu.hlt.concrete.TokenTagging; //导入依赖的package包/类
private static List<String> getTagging(TokenTagging tagging) {
if (tagging == null) {
return null;
}
List<String> tags = new ArrayList<String>();
for (TaggedToken tok : tagging.getTaggedTokenList()) {
tags.add(tok.getTag());
}
return tags;
}
示例9: getFirstXTagsWithName
import edu.jhu.hlt.concrete.TokenTagging; //导入依赖的package包/类
public static TokenTagging getFirstXTagsWithName(Tokenization tokenization, String taggingType, String toolName) {
if (!tokenization.isSetTokenTaggingList()) {
return null;
}
List<TokenTagging> tokenTaggingLists = tokenization.getTokenTaggingList();
for (int i = 0; i < tokenTaggingLists.size(); i++) {
TokenTagging tt = tokenTaggingLists.get(i);
if (tt.isSetTaggingType() && tt.getTaggingType().equals(taggingType)
&& (toolName == null || tt.getMetadata().getTool().contains(toolName))) {
return tt;
}
}
return null;
}
示例10: validateTokenTaggings
import edu.jhu.hlt.concrete.TokenTagging; //导入依赖的package包/类
/**
* @return true if {@link TokenTagging} list is not present in this {@link Tokenization}, or if all TokenTagging
* objects in the list are valid.
*/
private boolean validateTokenTaggings() {
boolean ttsValid = true;
if (this.annotation.isSetTokenTaggingList()) {
Iterator<TokenTagging> iter = this.annotation.getTokenTaggingListIterator();
while (ttsValid && iter.hasNext()) {
// Check validity of each TokenTagging.
TokenTagging tt = iter.next();
ttsValid = new ValidatableTokenTagging(tt, this.annotation).isValid();
}
}
return ttsValid;
}
示例11: convert
import edu.jhu.hlt.concrete.TokenTagging; //导入依赖的package包/类
private static final TaggedTokenGroup convert(TokenTagging tt) {
TaggedTokenGroup.Builder b = new TaggedTokenGroup.Builder();
b.setUUID(convert(tt.getUuid()));
AnnotationMetadata amd = tt.getMetadata();
b.setTool(NonEmptyNonWhitespaceString.create(amd.getTool()))
.setKBest(IntGreaterThanZero.create(amd.getKBest()))
.setTimestamp(UnixTimestamp.create(amd.getTimestamp()));
b.setNullableTaggingType(tt.getTaggingType());
for (edu.jhu.hlt.concrete.TaggedToken tok : tt.getTaggedTokenList()) {
TaggedToken pt = convert(tok);
b.putIndexToTaggedTokenMap(pt.getIndex().getVal(), pt);
}
return b.build();
}
示例12: testEndURL
import edu.jhu.hlt.concrete.TokenTagging; //导入依赖的package包/类
@Test
public void testEndURL() {
final String test = "'La traición vendrá de un general de alto rango que generará un gran caos' - http://t.co/MgLypirfTV http://…";
Tokenization t = Tokenizer.TWITTER.tokenizeToConcrete(test);
assertTrue(t.isSetTokenTaggingList());
List<TokenTagging> ttl = t.getTokenTaggingList();
assertEquals(1, ttl.size());
TokenTagging tt = ttl.get(0);
assertEquals("twitter", tt.getTaggingType());
List<TaggedToken> tagTL = tt.getTaggedTokenList().stream()
.filter(tagtok -> tagtok.getTag().equals("URL"))
.collect(Collectors.toList());
logger.debug("Tags:");
tagTL.stream()
.map(TaggedToken::getTag)
.forEach(logger::debug);
assertEquals(2, tagTL.size());
TaggedToken last = tagTL.get(tagTL.size() - 1);
assertTrue(t.isSetTokenList());
List<Token> tl = t.getTokenList().getTokenList();
logger.debug("tokens:");
tl.stream()
.map(Token::getText)
.forEach(logger::debug);
assertEquals("Should get 'http://' as text for last token.", "http://…", tl.get(last.getTokenIndex()).getText());
assertEquals("Type of last token should be 'URL'.", "URL", last.getTag());
}
示例13: getFirstXTags
import edu.jhu.hlt.concrete.TokenTagging; //导入依赖的package包/类
private TokenTagging getFirstXTags(Tokenization tokenization, TagTypes which) throws ConcreteException {
if (!tokenization.isSetTokenTaggingList())
throw new ConcreteException("No TokenTaggings for tokenization: " + tokenization.getUuid());
List<TokenTagging> tokenTaggingLists = tokenization.getTokenTaggingList();
for(int i = 0; i < tokenTaggingLists.size(); i++) {
TokenTagging tt = tokenTaggingLists.get(i);
if(tt.isSetTaggingType() && tt.getTaggingType().equals(which.name()))
return tt;
}
throw new ConcreteException("Did not find any tag theories with taggingType == " + which +" in tokenization " + tokenization.getUuid());
}
示例14: getNerTT
import edu.jhu.hlt.concrete.TokenTagging; //导入依赖的package包/类
/**
* @return the nerTT
*/
public TokenTagging getNerTT() {
return nerTT;
}
示例15: getPosTT
import edu.jhu.hlt.concrete.TokenTagging; //导入依赖的package包/类
/**
* @return the posTT
*/
public TokenTagging getPosTT() {
return posTT;
}