当前位置: 首页>>代码示例>>Java>>正文


Java StringUtils类代码示例

本文整理汇总了Java中edu.stanford.nlp.util.StringUtils的典型用法代码示例。如果您正苦于以下问题:Java StringUtils类的具体用法?Java StringUtils怎么用?Java StringUtils使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


StringUtils类属于edu.stanford.nlp.util包,在下文中一共展示了StringUtils类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: omitPunct

import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
public static String omitPunct(String str) {
  StringBuilder sb = new StringBuilder();
  boolean lastIsSpace = true;
  for (int i = 0; i < str.length(); ++i) {
    if (!StringUtils.isPunct((new Character(str.charAt(i))).toString())) {
      sb.append(str.charAt(i));
      if (str.charAt(i) == ' ') {
      	lastIsSpace = true;
      } else
      	lastIsSpace = false;
    } else if (!lastIsSpace) {
  	  sb.append(" ");
  	  lastIsSpace = true;
    }
    
  }
  return sb.toString();
}
 
开发者ID:cgraywang,项目名称:TextHIN,代码行数:19,代码来源:FileUtils.java

示例2: computeTopicSimilarity

import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
private List<Pair<String, Double>> computeTopicSimilarity(Concept c, int topic) {

		if (simMeasures == null) {
			simMeasures = new HashMap<String, ConceptSimilarityMeasure>();
			simMeasures.put("topic_jaccard", new JaccardDistance());
			simMeasures.put("topic_wn", new WordBasedMeasure(WNSimMeasure.RES));
			simMeasures.put("topic_w2v", new WordEmbeddingDistance(EmbeddingType.WORD2VEC, 300, false));
		}

		String[] topicDesc = this.topicDescriptions.get(topic);
		Concept dummy = new Concept(StringUtils.join(topicDesc));
		dummy = NonUIMAPreprocessor.getInstance().preprocess(dummy);

		List<Pair<String, Double>> scores = new ArrayList<Pair<String, Double>>();
		for (String sim : simMeasures.keySet()) {
			double score = Muter.callMuted(simMeasures.get(sim)::computeSimilarity, c, dummy);
			scores.add(new Pair<String, Double>(sim, score));
		}
		return scores;
	}
 
开发者ID:UKPLab,项目名称:ijcnlp2017-cmaps,代码行数:21,代码来源:FeatureExtractor.java

示例3: main

import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
/**
 * A debugging method to try relation extraction from the console.
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
  Properties props = StringUtils.argsToProperties(args);
  props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
  props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");

  StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
  IOUtils.console("sentence> ", line -> {
    Annotation ann = new Annotation(line);
    pipeline.annotate(ann);
    for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
      sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(System.err::println);
      System.out.println(sentence);
    }
  });
}
 
开发者ID:intel-analytics,项目名称:InformationExtraction,代码行数:20,代码来源:KBPTest.java

示例4: main

import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
public static void main(String[] args){
    try{
      Properties props = StringUtils.argsToProperties(args);
//      props.setProperty("annotators", "tokenize,ssplit,lemma,pos,parse,ner");
      StanfordCoreNLP pipeline = new StanfordCoreNLP();
      String sentence = "John Gerspach was named Chief Financial Officer of Citi in July 2009.";
      Annotation doc = new Annotation(sentence);
      pipeline.annotate(doc);
      RelationExtractorAnnotator r = new RelationExtractorAnnotator(props);
      r.annotate(doc);

      for(CoreMap s: doc.get(CoreAnnotations.SentencesAnnotation.class)){
        System.out.println("For sentence " + s.get(CoreAnnotations.TextAnnotation.class));
        List<RelationMention> rls  = s.get(RelationMentionsAnnotation.class);
        for(RelationMention rl: rls){
          System.out.println(rl.toString());
        }
      }
    }catch(Exception e){
      e.printStackTrace();
    }
  }
 
开发者ID:intel-analytics,项目名称:InformationExtraction,代码行数:23,代码来源:JavaReExTest.java

示例5: main

import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
    Properties props = StringUtils.argsToProperties(args);
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
    props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");
    Set<String> interested = Stream.of("per:title", "per:employee_of", "org:top_members/employees").collect(Collectors.toSet());
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    IOUtils.console("sentence> ", line -> {
        Annotation ann = new Annotation(line);
        pipeline.annotate(ann);
        for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
            sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(r -> {
                 String relation = r.relationGloss();
                if(interested.contains(relation)) {
                    System.err.println(r);
                }
            });
        }
    });
}
 
开发者ID:intel-analytics,项目名称:InformationExtraction,代码行数:20,代码来源:InteractiveDriver.java

示例6: parse

import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
public static void parse(FigerSystem sys, int lineId, String text) {
	Annotation annotation = new Annotation(text);
	Preprocessing.pipeline.annotate(annotation);
	// for each sentence
	int sentId = 0;
	for (CoreMap sentence : annotation.get(SentencesAnnotation.class)) {
		// System.out.println("[l" + i + "][s"
		// + sentId + "]tokenized sentence="
		// + StringUtils.joinWithOriginalWhiteSpace(sentence
		// .get(TokensAnnotation.class)));
		List<Pair<Integer, Integer>> entityMentionOffsets = getNamedEntityMentions(sentence);
		for (Pair<Integer, Integer> offset : entityMentionOffsets) {
			String label = sys.predict(annotation, sentId,
					offset.first, offset.second);
			String mention = StringUtils.joinWithOriginalWhiteSpace(sentence.get(
					TokensAnnotation.class).subList(offset.first, offset.second));
			System.out.println("[l" + lineId + "][s" + sentId + "]mention"
					+ mention + "(" + offset.first + ","
					+ offset.second + ") = " + mention + ", pred = "
					+ label);
		}
		sentId++;
	}
}
 
开发者ID:zhangcongle,项目名称:NewsSpikeRe,代码行数:25,代码来源:FigerSystem.java

示例7: parse

import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
public static void parse(ParseStanfordFigerReverb sys, int lineId, String text) {
	Annotation annotation = new Annotation(text);
	Preprocessing.pipeline.annotate(annotation);
	// for each sentence
	int sentId = 0;
	for (CoreMap sentence : annotation.get(SentencesAnnotation.class)) {
		// System.out.println("[l" + i + "][s"
		// + sentId + "]tokenized sentence="
		// + StringUtils.joinWithOriginalWhiteSpace(sentence
		// .get(TokensAnnotation.class)));
		List<Pair<Integer, Integer>> entityMentionOffsets = getNamedEntityMentions(sentence);
		for (Pair<Integer, Integer> offset : entityMentionOffsets) {
			String label = sys.predict(annotation, sentId, offset.first, offset.second);
			String mention = StringUtils.joinWithOriginalWhiteSpace(
					sentence.get(TokensAnnotation.class).subList(offset.first, offset.second));
			System.out.println("[l" + lineId + "][s" + sentId + "]mention" + mention + "(" + offset.first + ","
					+ offset.second + ") = " + mention + ", pred = " + label);
		}
		sentId++;
	}
}
 
开发者ID:zhangcongle,项目名称:NewsSpikeRe,代码行数:22,代码来源:ParseStanfordFigerReverb.java

示例8: nearestFile

import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
public static String nearestFile(String name, String directory, String badSubstring) {
   name = name.toLowerCase();
   File dir = new File(directory);
   if( dir.isDirectory() ) {
     float best = Float.MAX_VALUE;
     String bestName = null;
     for( String file : getFiles(dir) ) {
       file = file.toLowerCase();
       // edit distance?
       float editscore = StringUtils.editDistance(name, file);
//        System.out.println("name=" + name + "\tsimilar file " + file + " score = " + editscore);
       if( editscore < best && (badSubstring == null || !file.contains(badSubstring)) ) {
         best = editscore;
         bestName = file;
       }
     }
     return bestName;
   } else {
     System.out.println("(Directory) Not a directory: " + dir);
     System.exit(-1);
   }
   return null;
 }
 
开发者ID:nchambers,项目名称:schemas,代码行数:24,代码来源:Directory.java

示例9: main

import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
/**
 * Run Phrasal from the command line.
 *
 * @param args
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
  final Properties options = StringUtils.argsToProperties(args);
  final String configFile = options.containsKey("") ? (String) options.get("") : null;
  options.remove("");
  if ((options.size() == 0 && configFile == null) || options.containsKey("help") || options.containsKey("h")) {
    System.err.println(usage());
    System.exit(-1);
  }

  // by default, exit on uncaught exception
  Thread.setDefaultUncaughtExceptionHandler((t, ex) -> {
    logger.fatal("Uncaught top-level exception", ex);
    System.exit(-1);
  });

  final Map<String, List<String>> configuration = getConfigurationFrom(configFile, options);
  final Phrasal p = Phrasal.loadDecoder(configuration);
  
  if (options.containsKey("text")) p.decode(new FileInputStream(new File(options.getProperty("text"))), true);
  else p.decode(System.in, true);
}
 
开发者ID:stanfordnlp,项目名称:phrasal,代码行数:28,代码来源:Phrasal.java

示例10: main

import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
/**
 * A main method for training and evaluating the postprocessor.
 * 
 * @param args
 */
public static void main(String[] args) {
  // Strips off hyphens
  Properties options = StringUtils.argsToProperties(args, optionArgDefs());
  if (options.containsKey("help") || args.length == 0) {
    System.err.println(usage(GermanPostprocessor.class.getName()));
    System.exit(-1);
  }

  int nThreads = PropertiesUtils.getInt(options, "nthreads", 1);
  GermanPreprocessor preProcessor = new GermanPreprocessor();
  GermanPostprocessor postProcessor = new GermanPostprocessor(options);
  
  CRFPostprocessor.setup(postProcessor, preProcessor, options);
  CRFPostprocessor.execute(nThreads, preProcessor, postProcessor);    
}
 
开发者ID:stanfordnlp,项目名称:phrasal,代码行数:21,代码来源:GermanPostprocessor.java

示例11: main

import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
/**
 * A main method for training and evaluating the postprocessor.
 * 
 * @param args
 */
public static void main(String[] args) {
  // Strips off hyphens
  Properties options = StringUtils.argsToProperties(args, optionArgDefs());
  if (options.containsKey("help") || args.length == 0) {
    System.err.println(usage(FrenchPostprocessor.class.getName()));
    System.exit(-1);
  }

  int nThreads = PropertiesUtils.getInt(options, "nthreads", 1);
  FrenchPreprocessor preProcessor = new FrenchPreprocessor();
  FrenchPostprocessor postProcessor = new FrenchPostprocessor(options);
  
  CRFPostprocessor.setup(postProcessor, preProcessor, options);
  CRFPostprocessor.execute(nThreads, preProcessor, postProcessor);    
}
 
开发者ID:stanfordnlp,项目名称:phrasal,代码行数:21,代码来源:FrenchPostprocessor.java

示例12: main

import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
/**
 * A main method for training and evaluating the postprocessor.
 * 
 * @param args
 */
public static void main(String[] args) {
  // Strips off hyphens
  Properties options = StringUtils.argsToProperties(args, optionArgDefs());
  if (options.containsKey("help") || args.length == 0) {
    System.err.println(usage(EnglishPostprocessor.class.getName()));
    System.exit(-1);
  }

  int nThreads = PropertiesUtils.getInt(options, "nthreads", 1);
  EnglishPreprocessor preProcessor = new EnglishPreprocessor();
  EnglishPostprocessor postProcessor = new EnglishPostprocessor(options);
  
  CRFPostprocessor.setup(postProcessor, preProcessor, options);
  CRFPostprocessor.execute(nThreads, preProcessor, postProcessor);    
}
 
开发者ID:stanfordnlp,项目名称:phrasal,代码行数:21,代码来源:EnglishPostprocessor.java

示例13: main

import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
/**
 * A main method for training and evaluating the postprocessor.
 * 
 * @param args
 */
public static void main(String[] args) {
  // Strips off hyphens
  Properties options = StringUtils.argsToProperties(args, optionArgDefs());
  if (options.containsKey("help") || args.length == 0) {
    System.err.println(usage(SpanishPostprocessor.class.getName()));
    System.exit(-1);
  }

  int nThreads = PropertiesUtils.getInt(options, "nthreads", 1);
  SpanishPreprocessor preProcessor = new SpanishPreprocessor();
  SpanishPostprocessor postProcessor = new SpanishPostprocessor(options);
  
  CRFPostprocessor.setup(postProcessor, preProcessor, options);
  CRFPostprocessor.execute(nThreads, preProcessor, postProcessor);    
}
 
开发者ID:stanfordnlp,项目名称:phrasal,代码行数:21,代码来源:SpanishPostprocessor.java

示例14: srcCompoundCnt

import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
private static <TK,FV> int srcCompoundCnt(IString srcToken, IString tgtToken, List<DynamicTranslationModel<FV>> tmList) {
  int srcSize = srcToken.length(); 
  int cnt = 0;
  // each compound should have at least 4 characters
  for(int k = 4; k < srcSize - 3 ; ++k) {
    IString[] preSuffixes = new IString[3];
    preSuffixes[0] = new IString(srcToken.subSequence(0, k).toString()); // prefix
    preSuffixes[1] = new IString(srcToken.subSequence(srcSize - k , srcSize).toString()); // suffix
    preSuffixes[2] = new IString(StringUtils.capitalize(srcToken.subSequence(srcSize - k, srcSize).toString())); // capitalized suffix
    
    int mMax = preSuffixes[1] == preSuffixes[2] ? 2 : 3;
    
    for(int m = 0; m < mMax; ++m) {
      IString src = preSuffixes[m];
      if(!srcGarbageCollection(src, tmList)) {
        cnt += tmList.stream().mapToInt(tm -> tm.getJointLexCount(src, tgtToken)).sum();
      }
    }
  }
  return cnt;
}
 
开发者ID:stanfordnlp,项目名称:phrasal,代码行数:22,代码来源:SyntheticRules.java

示例15: tgtCompoundCnt

import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
private static <TK,FV> int tgtCompoundCnt(IString srcToken, IString tgtToken, List<DynamicTranslationModel<FV>> tmList) {
  int tgtSize = tgtToken.length(); 
  int cnt = 0;
  // each compound should have at least 4 characters
  for(int k = 4; k < tgtSize - 3 ; ++k) {
    IString[] preSuffixes = new IString[3];
    preSuffixes[0] = new IString(tgtToken.subSequence(0, k).toString()); // prefix
    preSuffixes[1] = new IString(tgtToken.subSequence(tgtSize - k , tgtSize).toString()); // suffix
    preSuffixes[2] = new IString(StringUtils.capitalize(tgtToken.subSequence(tgtSize - k, tgtSize).toString())); // capitalized suffix
    
    int mMax = preSuffixes[1] == preSuffixes[2] ? 2 : 3;
    
    for(int m = 0; m < mMax; ++m) {
      IString tgt = preSuffixes[m];
      if(!tgtGarbageCollection(tgt, tmList)) {
        cnt += tmList.stream().mapToInt(tm -> tm.getJointLexCount(srcToken, tgt)).sum();
      }
    }
  }
  return cnt;
}
 
开发者ID:stanfordnlp,项目名称:phrasal,代码行数:22,代码来源:SyntheticRules.java


注:本文中的edu.stanford.nlp.util.StringUtils类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。