当前位置: 首页>>代码示例>>Java>>正文


Java TextSpan类代码示例

本文整理汇总了Java中edu.jhu.hlt.concrete.TextSpan的典型用法代码示例。如果您正苦于以下问题:Java TextSpan类的具体用法?Java TextSpan怎么用?Java TextSpan使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


TextSpan类属于edu.jhu.hlt.concrete包,在下文中一共展示了TextSpan类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: tokenizationToCoreLabelList

import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
private static List<CoreLabel> tokenizationToCoreLabelList(final Tokenization tkz, int sentIdx, int offset) {
  List<CoreLabel> clList = new ArrayList<CoreLabel>();

  TokenList tl = tkz.getTokenList();
  List<Token> tokList = tl.getTokenList();
  for (Token tok : tokList) {
    final TextSpan ts = tok.getTextSpan();
    final int idx = tok.getTokenIndex();
    final int idxPlusOne = idx + 1;

    final int begin = ts.getStart() - offset;
    final int length = ts.getEnding() - ts.getStart();
    CoreLabel cl = factory.makeToken(tok.getText(), begin, length);
    cl.setIndex(idxPlusOne);
    cl.setSentIndex(sentIdx);
    // cl.setOriginalText(tok.getText());
    // cl.set(OriginalTextAnnotation.class, tok.getText());
    clList.add(cl);
  }

  return clList;
}
 
开发者ID:hltcoe,项目名称:concrete-stanford-deprecated2,代码行数:23,代码来源:ConcreteToStanfordMapper.java

示例2: unsectionedCommunicationFromText

import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
/**
 * NOTE: Do not make a Sentence, or else the annotator will not run
 */
public static Communication unsectionedCommunicationFromText(String text) {
  TextSpan span = new TextSpan();
  span.setStart(0);
  span.setEnding(text.length());

  Section sect = new Section();
  sect.setKind("Passage");
  sect.setUuid(UUIDFactory.newUUID());
  sect.setTextSpan(span);

  Communication c = new Communication();
  c.setUuid(UUIDFactory.newUUID());
  c.setId("Dummy_Communication");
  c.setMetadata(new AnnotationMetadata().setTool("BasicDepParseTester").setTimestamp(1));
  c.addToSectionList(sect);
  c.setText(text);
  return c;
}
 
开发者ID:hltcoe,项目名称:concrete-stanford-deprecated2,代码行数:22,代码来源:BasicDepParseTest.java

示例3: testCreate

import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
@Test
public void testCreate() throws Exception {
  Communication c = new Communication();
  c.setId("dumb");
  c.setUuid(this.g.next());
  c.setText(txt);
  CommunicationMetadata cmd = new CommunicationMetadata();
  TweetInfo ti = new TweetInfo();
  TwitterUser tu = new TwitterUser();
  tu.setId(410249214L);
  ti.setUser(tu);
  cmd.setTweetInfo(ti);
  TextSpan ts = new TextSpan(0, txt.length());
  Sentence st = new Sentence();
  st.setUuid(UUIDFactory.newUUID());
  st.setTextSpan(ts);

  Section s = new SectionFactory(g).fromTextSpan(ts, "passage");
  s.addToSentenceList(st);
  c.addToSectionList(s);
  c.setCommunicationMetadata(cmd);

  MiscCommunication mc = MiscCommunication.create(c);
  assertEquals("dumb", mc.getId().getContent());
  assertEquals(410249214L, mc.getAuthorTwitterID().get().longValue());
}
 
开发者ID:hltcoe,项目名称:concrete-java,代码行数:27,代码来源:MiscCommunicationTest.java

示例4: generateSentencesFromText

import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
/**
 * Given some text, generate a {@link List} of {@link Sentence} objects given the {@link Pattern}
 * for this class, which is:
 *
 * <pre>
 * [a-zA-Z0-9 ']+[.?!]+
 * </pre>
 *
 * @param s - The {@link String} from which to generate {@link Sentence}s
 * @return a {@link List} of {@link Sentence} objects
 */
public List<Sentence> generateSentencesFromText(String s) {
  List<Sentence> sentList = new ArrayList<Sentence>();
  Matcher m = this.splitPattern.matcher(s);
  while(m.find()) {
    int start = m.start();
    int end = m.end();

    TextSpan ts = new TextSpan(start, end);
    Sentence sent = new Sentence();
    sent.setTextSpan(ts);
    sentList.add(sent);
  }

  return sentList;
}
 
开发者ID:hltcoe,项目名称:concrete-java,代码行数:27,代码来源:SillySentenceSegmenter.java

示例5: annotate

import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
@Override
public SentencedCommunication annotate(Communication comm) throws AnalyticException {
  final Communication cpy = new Communication(comm);
  AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory(comm);
  AnalyticUUIDGenerator g = f.create();
  List<Section> sectionList = cpy.getSectionList();
  if (sectionList == null || sectionList.isEmpty()) {
    throw new AnalyticException("Communication does not have at least one Section; "
        + "cannot generate Sentences from it.");
  }

  for (Section s : sectionList) {
    TextSpan ts = s.getTextSpan();
    String sectionText = cpy.getText().substring(ts.getStart(), ts.getEnding());
    List<Sentence> sentList = this.generateSentencesFromText(sectionText);
    sentList.forEach(st -> st.setUuid(g.next()));
    s.setSentenceList(sentList);
  }

  try {
    return new CachedSentencedCommunication(cpy);
  } catch (MiscommunicationException e) {
    throw new AnalyticException(e);
  }
}
 
开发者ID:hltcoe,项目名称:concrete-java,代码行数:26,代码来源:SillySentenceSegmenter.java

示例6: annotate

import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
@Override
public TokenizedCommunication annotate(Communication comm) throws AnalyticException {
  Communication cp = new Communication(comm);
  // SuperCommunication sc = new SuperCommunication(cp);
  try {
    CachedSentencedCommunication csc = new CachedSentencedCommunication(cp);
    // backing map is a LinkedHashMap - ordering should be OK
    List<Sentence> sentences = new ArrayList<>(csc.getSentences());
    for (Sentence st : sentences) {
      TextSpan sts = st.getTextSpan();
      final String stText = cp.getText().substring(sts.getStart(), sts.getEnding());
      Tokenization t = this.tokenizer.tokenizeToConcrete(stText);
      // override metadata (should be patched later)
      t.setMetadata(TooledMetadataConverter.convert(this));
      st.setTokenization(t);
    }

    return new CachedTokenizationCommunication(cp);
  } catch (MiscommunicationException e) {
    throw new AnalyticException(e);
  }
}
 
开发者ID:hltcoe,项目名称:concrete-java,代码行数:23,代码来源:TiftTokenizer.java

示例7: generateConcreteTokenization

import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
/**
 * Generate a {@link Tokenization} object from a list of tokens, list of offsets, and start position of the text (e.g., first text character in the text).
 *
 * @param tokens
 *          - a {@link List} of tokens (Strings)
 * @param offsets
 *          - an array of integers (offsets)
 * @param startPos
 *          - starting position of the text
 * @return a {@link Tokenization} object with correct tokenization
 */
public static Tokenization generateConcreteTokenization(String[] tokens, int[] offsets, int startPos) {
  Tokenization tkz = new Tokenization();
  tkz.setKind(TokenizationKind.TOKEN_LIST);
  tkz.setMetadata(new AnnotationMetadata(tiftMetadata));
  tkz.setUuid(UUIDFactory.newUUID());

  TokenList tl = new TokenList();
  // Note: we use token index as token id.
  for (int tokenId = 0; tokenId < tokens.length; ++tokenId) {
    String token = tokens[tokenId];
    int start = startPos + offsets[tokenId];
    int end = start + token.length();
    TextSpan ts = new TextSpan(start, end);
    Token tokenObj = new Token();
    tokenObj.setTextSpan(ts).setText(token).setTokenIndex(tokenId);
    tl.addToTokenList(tokenObj);
  }

  tkz.setTokenList(tl);
  return tkz;
}
 
开发者ID:hltcoe,项目名称:concrete-java,代码行数:33,代码来源:ConcreteTokenization.java

示例8: mockCommunication

import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
private Communication mockCommunication(String id, String text) throws ConcreteException {
  Communication c = new Communication();
  c.setId(id);
  c.setUuid(this.g.next());
  c.setText(text);
  CommunicationMetadata cmd = new CommunicationMetadata();
  TextSpan ts = new TextSpan(0, text.length());
  Sentence st = new Sentence();
  st.setTextSpan(ts);
  st.setUuid(g.next());
  st.setTokenization(Tokenizer.WHITESPACE.tokenizeToConcrete(text));

  Section s = new SectionFactory(g).fromTextSpan(ts, "passage");
  s.addToSentenceList(st);

  c.addToSectionList(s);
  c.setCommunicationMetadata(cmd);

  return c;
}
 
开发者ID:hltcoe,项目名称:concrete-java,代码行数:21,代码来源:PretokenizedEndToEndTest.java

示例9: mockCommunication

import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
private Communication mockCommunication(String id, String text, long authorId) throws ConcreteException {
  Communication c = new Communication();
  c.setId(id);
  c.setUuid(this.g.next());
  c.setText(text);
  CommunicationMetadata cmd = new CommunicationMetadata();
  TweetInfo ti = new TweetInfo();
  TwitterUser tu = new TwitterUser();
  tu.setId(authorId);
  ti.setUser(tu);
  cmd.setTweetInfo(ti);
  TextSpan ts = new TextSpan(0, text.length());
  Sentence st = new Sentence();
  st.setTextSpan(ts);
  st.setUuid(g.next());

  Section s = new SectionFactory(g).fromTextSpan(ts, "passage");
  s.addToSentenceList(st);

  c.addToSectionList(s);
  c.setCommunicationMetadata(cmd);

  return c;
}
 
开发者ID:hltcoe,项目名称:concrete-java,代码行数:25,代码来源:EndToEndTest.java

示例10: concreteSectionToCoreMapList

import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
public static List<CoreMap> concreteSectionToCoreMapList(final Section sect, final String commText) {
  List<CoreMap> toRet = new ArrayList<>();
  List<Sentence> sentList = sect.getSentenceList();
  int tokOffset = 0;
  for (int i = 0; i < sentList.size(); i++) {
    Sentence st = sentList.get(i);
    CoreMap cm = new ArrayCoreMap();
    cm.set(SentenceIndexAnnotation.class, i);
    final TextSpan sts = st.getTextSpan();
    final int sentCharStart = sts.getStart();
    final int sentCharEnd = sts.getEnding();
    LOGGER.debug("Setting stanford sentence BeginChar = {}", sentCharStart);
    cm.set(CharacterOffsetBeginAnnotation.class, sentCharStart);
    LOGGER.debug("Setting stanford sentence EndChar = {}", sentCharEnd);
    cm.set(CharacterOffsetEndAnnotation.class, sentCharEnd);
    String sectText = commText.substring(sentCharStart, sentCharEnd);
    LOGGER.debug("Setting text: {}", sectText);
    cm.set(TextAnnotation.class, sectText);

    Tokenization tkz = st.getTokenization();
    List<CoreLabel> clList = tokenizationToCoreLabelList(tkz, i, sentCharStart);
    final int maxIdx = clList.size();
    LOGGER.debug("Setting stanford sentence token begin: {}", tokOffset);
    cm.set(TokenBeginAnnotation.class, tokOffset);
    final int tokEnd = tokOffset + maxIdx;
    LOGGER.debug("Setting stanford sentence token end: {}", tokEnd);
    cm.set(TokenEndAnnotation.class, tokEnd);
    cm.set(TokensAnnotation.class, clList);

    tokOffset = tokEnd;
    toRet.add(cm);
  }

  return toRet;
}
 
开发者ID:hltcoe,项目名称:concrete-stanford-deprecated2,代码行数:36,代码来源:ConcreteToStanfordMapper.java

示例11: toConcreteToken

import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
public Token toConcreteToken(int cOffset) throws AnalyticException {
  TextSpan ts = TextSpanFactory.withOffset(this.startSentenceOffset, this.endSentenceOffset, cOffset);
  LOGGER.debug("Creating concrete token text span: {}", ts);
  final int stanIdx = this.getIndex();
  final int concIndex = stanIdx - 1;
  if (concIndex < 0)
    throw new AnalyticException("The concrete token index was somehow less than 0. Original index: " + stanIdx);
  Token t = new Token(concIndex);
  t.setTextSpan(ts);
  // might be null (?)
  final String ttxt = this.text;
  LOGGER.debug("Setting Concrete token text: {}", ttxt);
  t.setText(ttxt);
  return t;
}
 
开发者ID:hltcoe,项目名称:concrete-stanford-deprecated2,代码行数:16,代码来源:TokenizedCoreLabelWrapper.java

示例12: validateTextSpan

import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
private static boolean validateTextSpan(Optional<TextSpan> ts) {
  boolean present = ts.isPresent();
  if (present)
    return new ValidatableTextSpan(ts.get()).isValid();
  else
    return true;
}
 
开发者ID:hltcoe,项目名称:concrete-java,代码行数:8,代码来源:ValidatableToken.java

示例13: testCreate

import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
@Test
public void testCreate() {
  Sentence st = new Sentence();
  st.setUuid(UUIDFactory.newUUID());
  TextSpan ts = new TextSpan(0, txt.length());
  st.setTextSpan(ts);
  MiscSentence ms = MiscSentence.create(st, "foo", txt);
  assertEquals("hello world!", ms.getTextSpan().get().getText().getContent());
}
 
开发者ID:hltcoe,项目名称:concrete-java,代码行数:10,代码来源:MiscSentenceTest.java

示例14: getSentencedCommunication

import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
private Communication getSentencedCommunication() throws ConcreteException {
  Communication c = this.getNoSentenceCommunication();
  Sentence st = new SentenceFactory(c).create();
  Section ptr = c.getSectionListIterator().next();
  TextSpan ts = new TextSpan(ptr.getTextSpan());
  st.setTextSpan(ts);
  ptr.addToSentenceList(st);
  return c;
}
 
开发者ID:hltcoe,项目名称:concrete-java,代码行数:10,代码来源:NoEmptySentenceListOrTokenizedCommunicationTest.java

示例15: convert

import edu.jhu.hlt.concrete.TextSpan; //导入依赖的package包/类
private static final Optional<FlatTextSpan> convert(TextSpan ts) {
  if (ts == null)
    return Optional.empty();
  else
    return Optional.of(new FlatTextSpan.Builder()
        .setStart(ts.getStart())
        .setEnd(ts.getEnding())
        .build());
}
 
开发者ID:hltcoe,项目名称:concrete-java,代码行数:10,代码来源:FromConcrete.java


注:本文中的edu.jhu.hlt.concrete.TextSpan类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。