当前位置: 首页>>代码示例>>Java>>正文


Java Sentence类代码示例

本文整理汇总了Java中edu.stanford.nlp.ling.Sentence的典型用法代码示例。如果您正苦于以下问题:Java Sentence类的具体用法?Java Sentence怎么用?Java Sentence使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


Sentence类属于edu.stanford.nlp.ling包,在下文中一共展示了Sentence类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: buildDependeciesTrees

import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
public ArrayList<DependencyTree> buildDependeciesTrees(ArrayList<String> texts) {
  ArrayList<DependencyTree> dtList = new ArrayList<DependencyTree>();
  int textId = 1;
  for(String text: texts) {
   System.out.println(textId);
   String[] sent = text.split(" ");
    Tree parse = this.lp.apply(Sentence.toWordList(sent));
    GrammaticalStructure gs = this.gsf.newGrammaticalStructure(parse);
    Collection<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
    DependencyTree curDT = new DependencyTree();
    for(TypedDependency td: tdl) {
      TreeGraphNode dep = td.dep();
      TreeGraphNode gov = td.gov();
      GrammaticalRelation gr = td.reln();
      String depString = gr.toString() + "^^^" + gov.toString() + "^^^" + dep.toString();
      curDT.addDependency(depString);
    }
    textId++;
    dtList.add(curDT);
  }

  return dtList;
}
 
开发者ID:dkmfbk,项目名称:pikes,代码行数:24,代码来源:DependenciesBuilder.java

示例2: getPCFGScore

import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
/**
 * Parses a sentence and returns the PCFG score as a confidence measure.
 * 
 * @param sentence a sentence
 * @return PCFG score
 */
@SuppressWarnings("unchecked")
public static double getPCFGScore(String sentence) {
       if (tlp == null || parser == null)
           throw new RuntimeException("Parser has not been initialized");
       
       // parse the sentence to produce PCFG score
       log.debug("Parsing sentence");
       double score;
       synchronized (parser) {
           Tokenizer tokenizer = tlp.getTokenizerFactory().getTokenizer(new StringReader(sentence));
           List<Word> words = tokenizer.tokenize();
           log.debug("Tokenization: "+words);
           parser.parse(new Sentence(words));
           score = parser.getPCFGScore();
       }
       
       return score;
}
 
开发者ID:claritylab,项目名称:lucida,代码行数:25,代码来源:StanfordParser.java

示例3: tokenize

import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
/**
 * Splits the sentence into individual tokens.
 * 
 * @param sentence Input sentence
 * @return Array of tokens
 */
public static String[] tokenize(String sentence) {
	List t = MaxentTagger.tokenizeText(new StringReader(sentence));
	
	List<String> tokens = new ArrayList<String>();
	
	for (int j = 0; j < t.size(); j++) {
		Sentence s1 = (Sentence) t.get(j);
		
		for (int i = 0; i < s1.length(); i++) {
			HasWord w = s1.getHasWord(i);
			tokens.add(w.word());
		}
	}
	
	return (String[]) tokens.toArray(new String[tokens.size()]);
}
 
开发者ID:claritylab,项目名称:lucida,代码行数:23,代码来源:StanfordPosTagger.java

示例4: tagPos

import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
/**
 * Tags the tokens with part of speech
 * 
 * @param tokens Array of token strings
 * @return Part of speech tags
 */
public static String[] tagPos(String[] tokens) {
	Sentence untagged = createSentence(tokens);
	Sentence tagged = MaxentTagger.tagSentence(untagged);
	
	String[] pos = new String[tagged.size()];
	for (int i = 0; i < tagged.size(); i++) {
		HasWord w = (HasWord) tagged.get(i);
		String[] s = w.toString().split("/");
		if (s.length > 1)
			pos[i] = s[s.length - 1];
		else
			pos[i] = "";
	}
	
	return pos;
}
 
开发者ID:claritylab,项目名称:lucida,代码行数:23,代码来源:StanfordPosTagger.java

示例5: getPCFGScore

import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
/**
 * Parses a sentence and returns the PCFG score as a confidence measure.
 * 
 * @param sentence
 *            a sentence
 * @return PCFG score
 */
@SuppressWarnings("unchecked")
public static double getPCFGScore(String sentence)
{
    if (tlp == null || parser == null)
        throw new RuntimeException("Parser has not been initialized");

    // parse the sentence to produce PCFG score
    log.debug("Parsing sentence");
    double score;
    synchronized (parser)
    {
        Tokenizer tokenizer = tlp.getTokenizerFactory().getTokenizer(
            new StringReader(sentence));
        List<Word> words = tokenizer.tokenize();
        log.debug("Tokenization: " + words);
        parser.parse(new Sentence(words));
        score = parser.getPCFGScore();
    }

    return score;
}
 
开发者ID:TScottJ,项目名称:OpenEphyra,代码行数:29,代码来源:StanfordParser.java

示例6: myExtractor

import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
private static Sentence<TaggedWord> myExtractor(Tree t, Sentence<TaggedWord> ty) {
  Tree[] kids = t.children();
  // this inlines the content of t.isPreTerminal()
  if (kids.length == 1 && kids[0].isLeaf()) {
    if (t.label() instanceof HasTag) {
      //   System.err.println("Object is: " + ((CategoryWordTag) t.label()).toString("full"));
      ty.add(new TaggedWord(kids[0].label().value(), ((HasTag) t.label()).tag()));
    } else {
      //   System.err.println("Object is: " + StringUtils.getShortClassName(t.label()) + " " + t.label());
      ty.add(new TaggedWord(kids[0].label().value(), t.label().value()));
    }
  } else {
    for (int i = 0; i < kids.length; i++) {
      myExtractor(kids[i], ty);
    }
  }
  return ty;
}
 
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:19,代码来源:TaggingEval.java

示例7: yield

import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
/**
 * Gets the yield of the tree.  The <code>Label</code> of all leaf nodes
 * is returned
 * as a list ordered by the natural left to right order of the
 * leaves.  Null values, if any, are inserted into the list like any
 * other value.
 * <p><i>Implementation notes:</i> c. 2003: This has been rewritten to thread, so only one List
 * is used. 2007: This method was duplicated to start to give type safety to Sentence.
 * This method will now make a Word for any Leaf which does not itself implement HasWord, and
 * put the Word into the Sentence, so the Sentence elements MUST implement HasWord.
 *
 * @param y The list in which the yield of the tree will be placed.
 *          Normally, this will be empty when the routine is called, but
 *          if not, the new yield is added to the end of the list.
 * @return a <code>List</code> of the data in the tree's leaves.
 */
@SuppressWarnings("unchecked")
public <X extends HasWord> Sentence<X> yield(Sentence<X> y) {
  if (isLeaf()) {
    Label lab = label();
    // cdm: this is new hacked in stuff in Mar 2007 so we can now have a
    // well-typed version of a Sentence, whose objects MUST implement HasWord
    if (lab instanceof HasWord) {
      y.add((X) lab);
    } else {
      y.add((X) new Word(lab));
    }
  } else {
    Tree[] kids = children();
    for (int i = 0; i < kids.length; i++) {
      kids[i].yield(y);
    }
  }
  return y;
}
 
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:36,代码来源:Tree.java

示例8: Annotation

import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
public Annotation(List<CoreMap> sentences) {
  super();
  this.set(CoreAnnotations.SentencesAnnotation.class, sentences);
  List<CoreLabel> tokens = new ArrayList<CoreLabel>();
  StringBuilder text = new StringBuilder();
  for (CoreMap sentence : sentences) {
    List<CoreLabel> sentenceTokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
    tokens.addAll(sentenceTokens);
    if (sentence.containsKey(CoreAnnotations.TokensAnnotation.class)) {
      text.append(sentence.get(CoreAnnotations.TokensAnnotation.class));
    } else {
      // If there is no text in the sentence, fake it as best as we can
      if (text.length() > 0) {
        text.append("\n");
      }
      text.append(Sentence.listToString(sentenceTokens));
    }
  }
  this.set(CoreAnnotations.TokensAnnotation.class, tokens);
  this.set(CoreAnnotations.TextAnnotation.class, text.toString());
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:22,代码来源:Annotation.java

示例9: printSentences

import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
public void printSentences(Iterable<List<? extends HasWord>> sentences, String filename)
{
  try {
    PrintWriter pw = IOUtils.getPrintWriter(filename);
    for (List<? extends HasWord> sentence:sentences) {
      pw.print("<s> ");   // Note: Use <s sentence-id > to identify sentences
      String sentString = Sentence.listToString(sentence);
      if (sentence.size() > maxSentenceLength) {
        logger.warning("Sentence length=" + sentence.size() +
                " is longer than maximum set length " + maxSentenceLength);
        logger.warning("Long Sentence: " + sentString);
      }
      pw.print(sentString);
      pw.println(" </s>");
    }
    pw.close();
  } catch (IOException ex) {
    throw new RuntimeException(ex);
  }
}
 
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:21,代码来源:CharniakParser.java

示例10: restoreOriginalWords

import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
@Override
public void restoreOriginalWords(Tree tree) {
  if (originalSentence == null || tree == null) {
    return;
  }
  List<Tree> leaves = tree.getLeaves();
  if (leaves.size() != originalSentence.size()) {
    throw new IllegalStateException("originalWords and sentence of different sizes: " + originalSentence.size() + " vs. " + leaves.size() +
                                    "\n Orig: " + Sentence.listToString(originalSentence) + 
                                    "\n Pars: " + Sentence.listToString(leaves));
  }
  Iterator<? extends Label> wordsIterator = (Iterator<? extends Label>) originalSentence.iterator();
  for (Tree leaf : leaves) {
    leaf.setLabel(wordsIterator.next());
  }
}
 
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:17,代码来源:LexicalizedParserQuery.java

示例11: doOneSentence

import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
private Tree doOneSentence(List<ParserConstraint> constraints, 
                           List<CoreLabel> words) {
  ParserQuery pq = parser.parserQuery();
  pq.setConstraints(constraints);
  pq.parse(words);
  Tree tree = null;
  try {
    tree = pq.getBestParse();
    // -10000 denotes unknown words
    tree.setScore(pq.getPCFGScore() % -10000.0);
  } catch (OutOfMemoryError e) {
    System.err.println("WARNING: Parsing of sentence ran out of memory.  " +
                       "Will ignore and continue: " +
                       Sentence.listToString(words));
  }
  return tree;
}
 
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:18,代码来源:ParserAnnotator.java

示例12: parse_tokens

import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
/**
 * @param tokens One sentence worth of tokens at a time.
 * @return A ParseTree object of the String representation of the tree, plus its probability.
 * @throws TApplicationException
 */
public ParseTree parse_tokens(List<String> tokens, List<String> outputFormat) throws TApplicationException
{
    try
    {
        treePrinter = ParserUtil.setOptions(outputFormat, tlp);

        // a single sentence worth of tokens
        String[] tokenArray = new String[tokens.size()];
        tokens.toArray(tokenArray);
        List<CoreLabel> crazyStanfordFormat = Sentence.toCoreLabelList(tokenArray);
        Tree parseTree = parser.apply(crazyStanfordFormat);
        return new ParseTree(ParserUtil.TreeObjectToString(parseTree, treePrinter), parseTree.score());
    }
    catch (Exception e)
    {
        // FIXME
        throw new TApplicationException(TApplicationException.INTERNAL_ERROR, e.getMessage());
    }
}
 
开发者ID:dmnapolitano,项目名称:stanford-thrift,代码行数:25,代码来源:StanfordParserThrift.java

示例13: main

import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
  if (args.length != 2) {
    System.err.println("usage: java TaggerDemo modelFile fileToTag");
    return;
  }
  MaxentTagger tagger = new MaxentTagger(args[0]);
  TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
							   "untokenizable=noneKeep");
  BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
  PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
  DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
  documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
  for (List<HasWord> sentence : documentPreprocessor) {
    List<TaggedWord> tSentence = tagger.tagSentence(sentence);
    pw.println(Sentence.listToString(tSentence, false));
  }
  pw.close();
}
 
开发者ID:jaimeguzman,项目名称:data_mining,代码行数:19,代码来源:TaggerDemo2.java

示例14: main

import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
/**
 * @param args
 */
public static void main(String[] args) throws Exception{
	// TODO Auto-generated method stub

	
	  if (args.length != 2) {
	      System.err.println("usage: java TaggerDemo modelFile fileToTag");
	      return;
	    }
	    MaxentTagger tagger = new MaxentTagger(args[0] );
	    List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader(args[1])));
	    for (List<HasWord> sentence : sentences) {
	    ArrayList<TaggedWord> tSentence = tagger.tagSentence(sentence);
	    System.out.println(Sentence.listToString(tSentence, false));

	    
	    }
	  
	
}
 
开发者ID:jaimeguzman,项目名称:data_mining,代码行数:23,代码来源:testPostagger.java

示例15: countMWEStatistics

import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
static public void countMWEStatistics(Tree t,
    TwoDimensionalCounter<String, String> unigramTagger,
    TwoDimensionalCounter<String, String> labelPreterm,
    TwoDimensionalCounter<String, String> pretermLabel,
    TwoDimensionalCounter<String, String> labelTerm,
    TwoDimensionalCounter<String, String> termLabel) 
{
  updateTagger(unigramTagger,t);

  //Count MWE statistics
  TregexMatcher m = pMWE.matcher(t);
  while (m.findNextMatchingNode()) {
    Tree match = m.getMatch();
    String label = match.value();
    if(RESOLVE_DUMMY_TAGS && label.equals(FrenchTreeReader.MISSING_PHRASAL))
      continue;
    
    String preterm = Sentence.listToString(match.preTerminalYield());
    String term = Sentence.listToString(match.yield());
    
    labelPreterm.incrementCount(label,preterm);
    pretermLabel.incrementCount(preterm,label);
    labelTerm.incrementCount(label,term);
    termLabel.incrementCount(term, label);
  }
}
 
开发者ID:amark-india,项目名称:eventspotter,代码行数:27,代码来源:MWEPreprocessor.java


注:本文中的edu.stanford.nlp.ling.Sentence类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。