当前位置: 首页>>代码示例>>Java>>正文


Java Treebank类代码示例

本文整理汇总了Java中edu.stanford.nlp.trees.Treebank的典型用法代码示例。如果您正苦于以下问题:Java Treebank类的具体用法?Java Treebank怎么用?Java Treebank使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


Treebank类属于edu.stanford.nlp.trees包,在下文中一共展示了Treebank类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getSegmentedWordLengthDistribution

import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
private Distribution<Integer> getSegmentedWordLengthDistribution(Treebank tb) {
  // CharacterLevelTagExtender ext = new CharacterLevelTagExtender();
  ClassicCounter<Integer> c = new ClassicCounter<Integer>();
  for (Iterator iterator = tb.iterator(); iterator.hasNext();) {
    Tree gold = (Tree) iterator.next();
    StringBuilder goldChars = new StringBuilder();
    Sentence goldYield = gold.yield();
    for (Iterator wordIter = goldYield.iterator(); wordIter.hasNext();) {
      Word word = (Word) wordIter.next();
      goldChars.append(word);
    }
    Sentence ourWords = segmentWords(goldChars.toString());
    for (int i = 0; i < ourWords.size(); i++) {
      c.incrementCount(Integer.valueOf(ourWords.get(i).toString().length()));
    }
  }
  return Distribution.getDistribution(c);
}
 
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:19,代码来源:ChineseMarkovWordSegmenter.java

示例2: getSegmentedWordLengthDistribution

import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
private Distribution<Integer> getSegmentedWordLengthDistribution(Treebank tb) {
  // CharacterLevelTagExtender ext = new CharacterLevelTagExtender();
  ClassicCounter<Integer> c = new ClassicCounter<Integer>();
  for (Iterator iterator = tb.iterator(); iterator.hasNext();) {
    Tree gold = (Tree) iterator.next();
    StringBuilder goldChars = new StringBuilder();
    ArrayList goldYield = gold.yield();
    for (Iterator wordIter = goldYield.iterator(); wordIter.hasNext();) {
      Word word = (Word) wordIter.next();
      goldChars.append(word);
    }
    List<HasWord> ourWords = segment(goldChars.toString());
    for (int i = 0; i < ourWords.size(); i++) {
      c.incrementCount(Integer.valueOf(ourWords.get(i).word().length()));
    }
  }
  return Distribution.getDistribution(c);
}
 
开发者ID:amark-india,项目名称:eventspotter,代码行数:19,代码来源:ChineseMarkovWordSegmenter.java

示例3: simplify

import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
public Treebank simplify(Tree tree) {

    Treebank totalSimplified = new MemoryTreebank();

    Queue<Tree> queue = new LinkedList<Tree>();
    queue.offer(tree);
    while (!queue.isEmpty()) {
      Tree t = queue.poll();

      boolean hasSimplification = false;

      int types[] = new int[] { //
      Simplifier.Parenthesis, //
          Simplifier.Coordination };

      for (int type : types) {
        hasSimplification = simplify(t, queue, totalSimplified, type);
        if (hasSimplification) {
          break;
        }
      }

      if (!hasSimplification) {
        if (t != tree) {
          totalSimplified.add(t);
        }
      }
    }
    return totalSimplified;
  }
 
开发者ID:leebird,项目名称:legonlp,代码行数:31,代码来源:GenerateParCooSimplification.java

示例4: simplify

import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
public Treebank simplify(Tree tree) {

    Treebank totalSimplified = new MemoryTreebank();

    Queue<Tree> queue = new LinkedList<Tree>();
    queue.offer(tree);
    while (!queue.isEmpty()) {
      Tree t = queue.poll();

      boolean hasSimplification = false;

      int types[] = new int[] { //
      Simplifier.Parenthesis, //
          Simplifier.Coordination, //
          Simplifier.Relative, //
          Simplifier.Apposition,//
          Simplifier.Others //
      };

      for (int type : types) {
        hasSimplification = simplify(t, queue, totalSimplified, type);
        if (hasSimplification) {
          break;
        }
      }

      if (!hasSimplification && t != tree) {
        totalSimplified.add(t);
      }
    }
    return totalSimplified;
  }
 
开发者ID:leebird,项目名称:legonlp,代码行数:33,代码来源:GenerateSimplification.java

示例5: getCandidates

import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
protected List<Entity> getCandidates(Entity entity, Treebank treebank, boolean intraSentence)
{
 List<Entity> entityList = new ArrayList<Entity>();
 
 for(Tree tree : treebank)
 {
  List<Tree> leaves = tree.getLeaves();
  OffsetLabel first = (OffsetLabel) leaves.get(0).label();
  OffsetLabel last = (OffsetLabel) leaves.get(leaves.size() - 1).label();
  int start = first.beginPosition();
  int end = last.endPosition();
  
  TregexPattern np = TregexPattern.compile("NP|NNP|NNPS|NN|NNS");
  TregexMatcher m = np.matcher(tree);
  while(m.find())
  {
	  Tree npTree = m.getMatch();
	  List<Token> tokens = Utils.getTokens(tree, npTree);

	  if(!npTree.isLeaf())
	  {
		  Entity candidate = new Entity("",npTree.nodeString(),tokens);
  
			if (entity.from() > candidate.to()) {
				if (intraSentence) {
					if ((entity.from() > start) && (entity.to() < end))
						entityList.add(candidate);
				} else
					entityList.add(candidate);
			}
	  }
  } 
 }
 return entityList;
}
 
开发者ID:leebird,项目名称:legonlp,代码行数:36,代码来源:ResoluteAnaphora.java

示例6: main

import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
public static void main(String[] args) {
  TreebankLangParserParams tlpp = new NegraPennTreebankParserParams();
  Treebank tb = tlpp.memoryTreebank();
  tb.loadPath(args[0]);
  for (Tree aTb : tb) {
    aTb.pennPrint();
  }
}
 
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:9,代码来源:NegraPennTreebankParserParams.java

示例7: getTuningSet

import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
private static List<FactoredLexiconEvent> getTuningSet(Treebank devTreebank,
    FactoredLexicon lexicon, TreebankLangParserParams tlpp) {
  List<Tree> devTrees = new ArrayList<Tree>(3000);
  for (Tree tree : devTreebank) {
    for (Tree subTree : tree) {
      if (!subTree.isLeaf()) {
        tlpp.transformTree(subTree, tree);
      }
    }
    devTrees.add(tree);
  }
  List<FactoredLexiconEvent> tuningSet = treebankToLexiconEvents(devTrees, lexicon);
  return tuningSet;
}
 
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:15,代码来源:FactoredLexicon.java

示例8: main

import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
public static void main(String[] args) {
  // simple testing code
  Treebank treebank = new DiskTreebank();
  CategoryWordTag.suppressTerminalDetails = true;
  treebank.loadPath(args[0]);
  final HeadFinder chf = new NoPunctuationHeadFinder();
  treebank.apply(new TreeVisitor() {
    public void visitTree(Tree pt) {
      pt.percolateHeads(chf);
      pt.pennPrint();
      System.out.println();
    }
  });
}
 
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:15,代码来源:NoPunctuationHeadFinder.java

示例9: processFile

import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
@Override
public final void processFile(String dir, String filename) {
  super.processFile(dir, filename);

  if (filename.equals("PMC-3062687-12-Methods")) {
    return;
  }

  readResource(dir, filename);

  if (index != -1) {
    MemoryTreebank newTreebank = new MemoryTreebank();
    newTreebank.add(treebank.get(index));
    treebank = newTreebank;
  }

  // general
  Treebank simpTreebank = new MemoryTreebank();
  for (Tree t : treebank) {
    simpTreebank.addAll(simplify(t));
  }

  // output
  try 
       {
    PrintStream out = new PrintStream(new FileOutputStream(Env.DIR_SIMP
        + filename
        + ".ptb.simp"));
    Set<String> noDuplicates = new HashSet<String>();
    for (Tree tree : simpTreebank) {
      String line = tree.toString();
      if (!noDuplicates.contains(line)) {
        out.println(line);
        noDuplicates.add(line);
      }
    }
    out.close();
  } catch (FileNotFoundException e) {
    e.printStackTrace();
    System.exit(1);
  }
}
 
开发者ID:leebird,项目名称:legonlp,代码行数:43,代码来源:GenerateSimplification.java

示例10: recover

import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
public String recover(String text, List<Entity> entityList, HashMap<String,HashMap<String,Integer>> map)
{
       PtbReader ptbReader = new PtbReader(Env.DIR_PARSE + filename + ".ptb");
       Treebank treebank = ptbReader.readTreebank();
       String res = "";
       String pattern = Env.ENTITY_REPLACE;

       for(Tree t : treebank)
       {      
       	List<Tree> leaves = t.getLeaves();
       	
   		for(Tree l : leaves)
   		{        	 
   			String word = Utils.adaptValue(l.label().toString());
   	    	Pattern r = Pattern.compile(pattern);
   	    	Matcher m = r.matcher(word);
   	    	
   	    	while(m.find())
   	    	{
   	    		String needle = m.group();
   	    		try {
   	    			int entStart = map.get(needle).get("start");
   	    			int entend = map.get(needle).get("end");

    			for(Entity entity : entityList)
    			{	        
    				
        			int start = entity.from();
        			int end = entity.to();
       				String entityText = entity.getText();

       				if(start == entStart && entend == end)
        			{
       					word = word.replace(needle, entityText);
       					m = r.matcher(word);
       					break;
        			}
        		}
   	    		} catch (Exception e){
   	    			System.out.println(needle);
   	    			System.exit(1);
   	    		}
   	    	}
   	    	l.setValue(word);
   		}
       	res += t.toString()+"\n";
       }
       return res;
}
 
开发者ID:leebird,项目名称:legonlp,代码行数:50,代码来源:RecoverEntity.java

示例11: main

import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
/**
 * Execute with no arguments for usage.
 */
public static void main(String[] args) {

  if(!validateCommandLine(args)) {
    System.err.println(usage);
    System.exit(-1);
  }

  final TreebankLangParserParams tlpp = Languages.getLanguageParams(LANGUAGE);
  final PrintWriter pwOut = tlpp.pw();

  final Treebank guessTreebank = tlpp.diskTreebank();
  guessTreebank.loadPath(guessFile);
  pwOut.println("GUESS TREEBANK:");
  pwOut.println(guessTreebank.textualSummary());

  final Treebank goldTreebank = tlpp.diskTreebank();
  goldTreebank.loadPath(goldFile);
  pwOut.println("GOLD TREEBANK:");
  pwOut.println(goldTreebank.textualSummary());

  final LeafAncestorEval metric = new LeafAncestorEval("LeafAncestor");

  final TreeTransformer tc = tlpp.collinizer();

  //The evalb ref implementation assigns status for each tree pair as follows:
  //
  //   0 - Ok (yields match)
  //   1 - length mismatch
  //   2 - null parse e.g. (()).
  //
  //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
  final Iterator<Tree> goldItr = goldTreebank.iterator();
  final Iterator<Tree> guessItr = guessTreebank.iterator();
  int goldLineId = 0;
  int guessLineId = 0;
  int skippedGuessTrees = 0;
  while( guessItr.hasNext() && goldItr.hasNext() ) {
    Tree guessTree = guessItr.next();
    List<Label> guessYield = guessTree.yield();
    guessLineId++;

    Tree goldTree = goldItr.next();
    List<Label> goldYield = goldTree.yield();
    goldLineId++;

    // Check that we should evaluate this tree
    if(goldYield.size() > MAX_GOLD_YIELD) {
      skippedGuessTrees++;
      continue;
    }

    // Only trees with equal yields can be evaluated
    if(goldYield.size() != guessYield.size()) {
      pwOut.printf("Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n", goldYield.size(), guessYield.size(), goldLineId, guessLineId);
      skippedGuessTrees++;
      continue;
    }
    
    final Tree evalGuess = tc.transformTree(guessTree);
    final Tree evalGold = tc.transformTree(goldTree);

    metric.evaluate(evalGuess, evalGold, ((VERBOSE) ? pwOut : null));
  }
  
  if(guessItr.hasNext() || goldItr.hasNext()) {
    System.err.printf("Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.", guessLineId, goldLineId);
  }
  
  pwOut.println("================================================================================");
  if(skippedGuessTrees != 0) pwOut.printf("%s %d guess trees\n", "Unable to evaluate", skippedGuessTrees);
  metric.display(true, pwOut);
  pwOut.close();
}
 
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:77,代码来源:LeafAncestorEval.java


注:本文中的edu.stanford.nlp.trees.Treebank类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。