本文整理汇总了Java中edu.jhu.hlt.concrete.TextSpan类的典型用法代码示例。如果您正苦于以下问题:Java TextSpan类的具体用法?Java TextSpan怎么用?Java TextSpan使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
TextSpan类属于edu.jhu.hlt.concrete包,在下文中一共展示了TextSpan类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: tokenizationToCoreLabelList
import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
private static List<CoreLabel> tokenizationToCoreLabelList(final Tokenization tkz, int sentIdx, int offset) {
List<CoreLabel> clList = new ArrayList<CoreLabel>();
TokenList tl = tkz.getTokenList();
List<Token> tokList = tl.getTokenList();
for (Token tok : tokList) {
final TextSpan ts = tok.getTextSpan();
final int idx = tok.getTokenIndex();
final int idxPlusOne = idx + 1;
final int begin = ts.getStart() - offset;
final int length = ts.getEnding() - ts.getStart();
CoreLabel cl = factory.makeToken(tok.getText(), begin, length);
cl.setIndex(idxPlusOne);
cl.setSentIndex(sentIdx);
// cl.setOriginalText(tok.getText());
// cl.set(OriginalTextAnnotation.class, tok.getText());
clList.add(cl);
}
return clList;
}
示例2: unsectionedCommunicationFromText
import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
/**
* NOTE: Do not make a Sentence, or else the annotator will not run
*/
public static Communication unsectionedCommunicationFromText(String text) {
TextSpan span = new TextSpan();
span.setStart(0);
span.setEnding(text.length());
Section sect = new Section();
sect.setKind("Passage");
sect.setUuid(UUIDFactory.newUUID());
sect.setTextSpan(span);
Communication c = new Communication();
c.setUuid(UUIDFactory.newUUID());
c.setId("Dummy_Communication");
c.setMetadata(new AnnotationMetadata().setTool("BasicDepParseTester").setTimestamp(1));
c.addToSectionList(sect);
c.setText(text);
return c;
}
示例3: testCreate
import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
@Test
public void testCreate() throws Exception {
Communication c = new Communication();
c.setId("dumb");
c.setUuid(this.g.next());
c.setText(txt);
CommunicationMetadata cmd = new CommunicationMetadata();
TweetInfo ti = new TweetInfo();
TwitterUser tu = new TwitterUser();
tu.setId(410249214L);
ti.setUser(tu);
cmd.setTweetInfo(ti);
TextSpan ts = new TextSpan(0, txt.length());
Sentence st = new Sentence();
st.setUuid(UUIDFactory.newUUID());
st.setTextSpan(ts);
Section s = new SectionFactory(g).fromTextSpan(ts, "passage");
s.addToSentenceList(st);
c.addToSectionList(s);
c.setCommunicationMetadata(cmd);
MiscCommunication mc = MiscCommunication.create(c);
assertEquals("dumb", mc.getId().getContent());
assertEquals(410249214L, mc.getAuthorTwitterID().get().longValue());
}
示例4: generateSentencesFromText
import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
/**
* Given some text, generate a {@link List} of {@link Sentence} objects given the {@link Pattern}
* for this class, which is:
*
* <pre>
* [a-zA-Z0-9 ']+[.?!]+
* </pre>
*
* @param s - The {@link String} from which to generate {@link Sentence}s
* @return a {@link List} of {@link Sentence} objects
*/
public List<Sentence> generateSentencesFromText(String s) {
List<Sentence> sentList = new ArrayList<Sentence>();
Matcher m = this.splitPattern.matcher(s);
while(m.find()) {
int start = m.start();
int end = m.end();
TextSpan ts = new TextSpan(start, end);
Sentence sent = new Sentence();
sent.setTextSpan(ts);
sentList.add(sent);
}
return sentList;
}
示例5: annotate
import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
@Override
public SentencedCommunication annotate(Communication comm) throws AnalyticException {
final Communication cpy = new Communication(comm);
AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory(comm);
AnalyticUUIDGenerator g = f.create();
List<Section> sectionList = cpy.getSectionList();
if (sectionList == null || sectionList.isEmpty()) {
throw new AnalyticException("Communication does not have at least one Section; "
+ "cannot generate Sentences from it.");
}
for (Section s : sectionList) {
TextSpan ts = s.getTextSpan();
String sectionText = cpy.getText().substring(ts.getStart(), ts.getEnding());
List<Sentence> sentList = this.generateSentencesFromText(sectionText);
sentList.forEach(st -> st.setUuid(g.next()));
s.setSentenceList(sentList);
}
try {
return new CachedSentencedCommunication(cpy);
} catch (MiscommunicationException e) {
throw new AnalyticException(e);
}
}
示例6: annotate
import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
@Override
public TokenizedCommunication annotate(Communication comm) throws AnalyticException {
Communication cp = new Communication(comm);
// SuperCommunication sc = new SuperCommunication(cp);
try {
CachedSentencedCommunication csc = new CachedSentencedCommunication(cp);
// backing map is a LinkedHashMap - ordering should be OK
List<Sentence> sentences = new ArrayList<>(csc.getSentences());
for (Sentence st : sentences) {
TextSpan sts = st.getTextSpan();
final String stText = cp.getText().substring(sts.getStart(), sts.getEnding());
Tokenization t = this.tokenizer.tokenizeToConcrete(stText);
// override metadata (should be patched later)
t.setMetadata(TooledMetadataConverter.convert(this));
st.setTokenization(t);
}
return new CachedTokenizationCommunication(cp);
} catch (MiscommunicationException e) {
throw new AnalyticException(e);
}
}
示例7: generateConcreteTokenization
import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
/**
* Generate a {@link Tokenization} object from a list of tokens, list of offsets, and start position of the text (e.g., first text character in the text).
*
* @param tokens
* - a {@link List} of tokens (Strings)
* @param offsets
* - an array of integers (offsets)
* @param startPos
* - starting position of the text
* @return a {@link Tokenization} object with correct tokenization
*/
public static Tokenization generateConcreteTokenization(String[] tokens, int[] offsets, int startPos) {
Tokenization tkz = new Tokenization();
tkz.setKind(TokenizationKind.TOKEN_LIST);
tkz.setMetadata(new AnnotationMetadata(tiftMetadata));
tkz.setUuid(UUIDFactory.newUUID());
TokenList tl = new TokenList();
// Note: we use token index as token id.
for (int tokenId = 0; tokenId < tokens.length; ++tokenId) {
String token = tokens[tokenId];
int start = startPos + offsets[tokenId];
int end = start + token.length();
TextSpan ts = new TextSpan(start, end);
Token tokenObj = new Token();
tokenObj.setTextSpan(ts).setText(token).setTokenIndex(tokenId);
tl.addToTokenList(tokenObj);
}
tkz.setTokenList(tl);
return tkz;
}
示例8: mockCommunication
import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
private Communication mockCommunication(String id, String text) throws ConcreteException {
Communication c = new Communication();
c.setId(id);
c.setUuid(this.g.next());
c.setText(text);
CommunicationMetadata cmd = new CommunicationMetadata();
TextSpan ts = new TextSpan(0, text.length());
Sentence st = new Sentence();
st.setTextSpan(ts);
st.setUuid(g.next());
st.setTokenization(Tokenizer.WHITESPACE.tokenizeToConcrete(text));
Section s = new SectionFactory(g).fromTextSpan(ts, "passage");
s.addToSentenceList(st);
c.addToSectionList(s);
c.setCommunicationMetadata(cmd);
return c;
}
示例9: mockCommunication
import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
private Communication mockCommunication(String id, String text, long authorId) throws ConcreteException {
Communication c = new Communication();
c.setId(id);
c.setUuid(this.g.next());
c.setText(text);
CommunicationMetadata cmd = new CommunicationMetadata();
TweetInfo ti = new TweetInfo();
TwitterUser tu = new TwitterUser();
tu.setId(authorId);
ti.setUser(tu);
cmd.setTweetInfo(ti);
TextSpan ts = new TextSpan(0, text.length());
Sentence st = new Sentence();
st.setTextSpan(ts);
st.setUuid(g.next());
Section s = new SectionFactory(g).fromTextSpan(ts, "passage");
s.addToSentenceList(st);
c.addToSectionList(s);
c.setCommunicationMetadata(cmd);
return c;
}
示例10: concreteSectionToCoreMapList
import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
public static List<CoreMap> concreteSectionToCoreMapList(final Section sect, final String commText) {
List<CoreMap> toRet = new ArrayList<>();
List<Sentence> sentList = sect.getSentenceList();
int tokOffset = 0;
for (int i = 0; i < sentList.size(); i++) {
Sentence st = sentList.get(i);
CoreMap cm = new ArrayCoreMap();
cm.set(SentenceIndexAnnotation.class, i);
final TextSpan sts = st.getTextSpan();
final int sentCharStart = sts.getStart();
final int sentCharEnd = sts.getEnding();
LOGGER.debug("Setting stanford sentence BeginChar = {}", sentCharStart);
cm.set(CharacterOffsetBeginAnnotation.class, sentCharStart);
LOGGER.debug("Setting stanford sentence EndChar = {}", sentCharEnd);
cm.set(CharacterOffsetEndAnnotation.class, sentCharEnd);
String sectText = commText.substring(sentCharStart, sentCharEnd);
LOGGER.debug("Setting text: {}", sectText);
cm.set(TextAnnotation.class, sectText);
Tokenization tkz = st.getTokenization();
List<CoreLabel> clList = tokenizationToCoreLabelList(tkz, i, sentCharStart);
final int maxIdx = clList.size();
LOGGER.debug("Setting stanford sentence token begin: {}", tokOffset);
cm.set(TokenBeginAnnotation.class, tokOffset);
final int tokEnd = tokOffset + maxIdx;
LOGGER.debug("Setting stanford sentence token end: {}", tokEnd);
cm.set(TokenEndAnnotation.class, tokEnd);
cm.set(TokensAnnotation.class, clList);
tokOffset = tokEnd;
toRet.add(cm);
}
return toRet;
}
示例11: toConcreteToken
import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
public Token toConcreteToken(int cOffset) throws AnalyticException {
TextSpan ts = TextSpanFactory.withOffset(this.startSentenceOffset, this.endSentenceOffset, cOffset);
LOGGER.debug("Creating concrete token text span: {}", ts);
final int stanIdx = this.getIndex();
final int concIndex = stanIdx - 1;
if (concIndex < 0)
throw new AnalyticException("The concrete token index was somehow less than 0. Original index: " + stanIdx);
Token t = new Token(concIndex);
t.setTextSpan(ts);
// might be null (?)
final String ttxt = this.text;
LOGGER.debug("Setting Concrete token text: {}", ttxt);
t.setText(ttxt);
return t;
}
示例12: validateTextSpan
import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
private static boolean validateTextSpan(Optional<TextSpan> ts) {
boolean present = ts.isPresent();
if (present)
return new ValidatableTextSpan(ts.get()).isValid();
else
return true;
}
示例13: testCreate
import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
@Test
public void testCreate() {
Sentence st = new Sentence();
st.setUuid(UUIDFactory.newUUID());
TextSpan ts = new TextSpan(0, txt.length());
st.setTextSpan(ts);
MiscSentence ms = MiscSentence.create(st, "foo", txt);
assertEquals("hello world!", ms.getTextSpan().get().getText().getContent());
}
示例14: getSentencedCommunication
import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
private Communication getSentencedCommunication() throws ConcreteException {
Communication c = this.getNoSentenceCommunication();
Sentence st = new SentenceFactory(c).create();
Section ptr = c.getSectionListIterator().next();
TextSpan ts = new TextSpan(ptr.getTextSpan());
st.setTextSpan(ts);
ptr.addToSentenceList(st);
return c;
}
示例15: convert
import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
private static final Optional<FlatTextSpan> convert(TextSpan ts) {
if (ts == null)
return Optional.empty();
else
return Optional.of(new FlatTextSpan.Builder()
.setStart(ts.getStart())
.setEnd(ts.getEnding())
.build());
}