本文整理汇总了Java中edu.stanford.nlp.util.StringUtils类的典型用法代码示例。如果您正苦于以下问题:Java StringUtils类的具体用法?Java StringUtils怎么用?Java StringUtils使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
StringUtils类属于edu.stanford.nlp.util包,在下文中一共展示了StringUtils类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: omitPunct
import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
public static String omitPunct(String str) {
StringBuilder sb = new StringBuilder();
boolean lastIsSpace = true;
for (int i = 0; i < str.length(); ++i) {
if (!StringUtils.isPunct((new Character(str.charAt(i))).toString())) {
sb.append(str.charAt(i));
if (str.charAt(i) == ' ') {
lastIsSpace = true;
} else
lastIsSpace = false;
} else if (!lastIsSpace) {
sb.append(" ");
lastIsSpace = true;
}
}
return sb.toString();
}
示例2: computeTopicSimilarity
import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
private List<Pair<String, Double>> computeTopicSimilarity(Concept c, int topic) {
if (simMeasures == null) {
simMeasures = new HashMap<String, ConceptSimilarityMeasure>();
simMeasures.put("topic_jaccard", new JaccardDistance());
simMeasures.put("topic_wn", new WordBasedMeasure(WNSimMeasure.RES));
simMeasures.put("topic_w2v", new WordEmbeddingDistance(EmbeddingType.WORD2VEC, 300, false));
}
String[] topicDesc = this.topicDescriptions.get(topic);
Concept dummy = new Concept(StringUtils.join(topicDesc));
dummy = NonUIMAPreprocessor.getInstance().preprocess(dummy);
List<Pair<String, Double>> scores = new ArrayList<Pair<String, Double>>();
for (String sim : simMeasures.keySet()) {
double score = Muter.callMuted(simMeasures.get(sim)::computeSimilarity, c, dummy);
scores.add(new Pair<String, Double>(sim, score));
}
return scores;
}
示例3: main
import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
/**
* A debugging method to try relation extraction from the console.
* @throws IOException
*/
public static void main(String[] args) throws IOException {
Properties props = StringUtils.argsToProperties(args);
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
IOUtils.console("sentence> ", line -> {
Annotation ann = new Annotation(line);
pipeline.annotate(ann);
for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(System.err::println);
System.out.println(sentence);
}
});
}
示例4: main
import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
public static void main(String[] args){
try{
Properties props = StringUtils.argsToProperties(args);
// props.setProperty("annotators", "tokenize,ssplit,lemma,pos,parse,ner");
StanfordCoreNLP pipeline = new StanfordCoreNLP();
String sentence = "John Gerspach was named Chief Financial Officer of Citi in July 2009.";
Annotation doc = new Annotation(sentence);
pipeline.annotate(doc);
RelationExtractorAnnotator r = new RelationExtractorAnnotator(props);
r.annotate(doc);
for(CoreMap s: doc.get(CoreAnnotations.SentencesAnnotation.class)){
System.out.println("For sentence " + s.get(CoreAnnotations.TextAnnotation.class));
List<RelationMention> rls = s.get(RelationMentionsAnnotation.class);
for(RelationMention rl: rls){
System.out.println(rl.toString());
}
}
}catch(Exception e){
e.printStackTrace();
}
}
示例5: main
import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
Properties props = StringUtils.argsToProperties(args);
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");
Set<String> interested = Stream.of("per:title", "per:employee_of", "org:top_members/employees").collect(Collectors.toSet());
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
IOUtils.console("sentence> ", line -> {
Annotation ann = new Annotation(line);
pipeline.annotate(ann);
for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(r -> {
String relation = r.relationGloss();
if(interested.contains(relation)) {
System.err.println(r);
}
});
}
});
}
示例6: parse
import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
public static void parse(FigerSystem sys, int lineId, String text) {
Annotation annotation = new Annotation(text);
Preprocessing.pipeline.annotate(annotation);
// for each sentence
int sentId = 0;
for (CoreMap sentence : annotation.get(SentencesAnnotation.class)) {
// System.out.println("[l" + i + "][s"
// + sentId + "]tokenized sentence="
// + StringUtils.joinWithOriginalWhiteSpace(sentence
// .get(TokensAnnotation.class)));
List<Pair<Integer, Integer>> entityMentionOffsets = getNamedEntityMentions(sentence);
for (Pair<Integer, Integer> offset : entityMentionOffsets) {
String label = sys.predict(annotation, sentId,
offset.first, offset.second);
String mention = StringUtils.joinWithOriginalWhiteSpace(sentence.get(
TokensAnnotation.class).subList(offset.first, offset.second));
System.out.println("[l" + lineId + "][s" + sentId + "]mention"
+ mention + "(" + offset.first + ","
+ offset.second + ") = " + mention + ", pred = "
+ label);
}
sentId++;
}
}
示例7: parse
import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
public static void parse(ParseStanfordFigerReverb sys, int lineId, String text) {
Annotation annotation = new Annotation(text);
Preprocessing.pipeline.annotate(annotation);
// for each sentence
int sentId = 0;
for (CoreMap sentence : annotation.get(SentencesAnnotation.class)) {
// System.out.println("[l" + i + "][s"
// + sentId + "]tokenized sentence="
// + StringUtils.joinWithOriginalWhiteSpace(sentence
// .get(TokensAnnotation.class)));
List<Pair<Integer, Integer>> entityMentionOffsets = getNamedEntityMentions(sentence);
for (Pair<Integer, Integer> offset : entityMentionOffsets) {
String label = sys.predict(annotation, sentId, offset.first, offset.second);
String mention = StringUtils.joinWithOriginalWhiteSpace(
sentence.get(TokensAnnotation.class).subList(offset.first, offset.second));
System.out.println("[l" + lineId + "][s" + sentId + "]mention" + mention + "(" + offset.first + ","
+ offset.second + ") = " + mention + ", pred = " + label);
}
sentId++;
}
}
示例8: nearestFile
import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
public static String nearestFile(String name, String directory, String badSubstring) {
name = name.toLowerCase();
File dir = new File(directory);
if( dir.isDirectory() ) {
float best = Float.MAX_VALUE;
String bestName = null;
for( String file : getFiles(dir) ) {
file = file.toLowerCase();
// edit distance?
float editscore = StringUtils.editDistance(name, file);
// System.out.println("name=" + name + "\tsimilar file " + file + " score = " + editscore);
if( editscore < best && (badSubstring == null || !file.contains(badSubstring)) ) {
best = editscore;
bestName = file;
}
}
return bestName;
} else {
System.out.println("(Directory) Not a directory: " + dir);
System.exit(-1);
}
return null;
}
示例9: main
import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
/**
* Run Phrasal from the command line.
*
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
final Properties options = StringUtils.argsToProperties(args);
final String configFile = options.containsKey("") ? (String) options.get("") : null;
options.remove("");
if ((options.size() == 0 && configFile == null) || options.containsKey("help") || options.containsKey("h")) {
System.err.println(usage());
System.exit(-1);
}
// by default, exit on uncaught exception
Thread.setDefaultUncaughtExceptionHandler((t, ex) -> {
logger.fatal("Uncaught top-level exception", ex);
System.exit(-1);
});
final Map<String, List<String>> configuration = getConfigurationFrom(configFile, options);
final Phrasal p = Phrasal.loadDecoder(configuration);
if (options.containsKey("text")) p.decode(new FileInputStream(new File(options.getProperty("text"))), true);
else p.decode(System.in, true);
}
示例10: main
import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
/**
* A main method for training and evaluating the postprocessor.
*
* @param args
*/
public static void main(String[] args) {
// Strips off hyphens
Properties options = StringUtils.argsToProperties(args, optionArgDefs());
if (options.containsKey("help") || args.length == 0) {
System.err.println(usage(GermanPostprocessor.class.getName()));
System.exit(-1);
}
int nThreads = PropertiesUtils.getInt(options, "nthreads", 1);
GermanPreprocessor preProcessor = new GermanPreprocessor();
GermanPostprocessor postProcessor = new GermanPostprocessor(options);
CRFPostprocessor.setup(postProcessor, preProcessor, options);
CRFPostprocessor.execute(nThreads, preProcessor, postProcessor);
}
示例11: main
import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
/**
* A main method for training and evaluating the postprocessor.
*
* @param args
*/
public static void main(String[] args) {
// Strips off hyphens
Properties options = StringUtils.argsToProperties(args, optionArgDefs());
if (options.containsKey("help") || args.length == 0) {
System.err.println(usage(FrenchPostprocessor.class.getName()));
System.exit(-1);
}
int nThreads = PropertiesUtils.getInt(options, "nthreads", 1);
FrenchPreprocessor preProcessor = new FrenchPreprocessor();
FrenchPostprocessor postProcessor = new FrenchPostprocessor(options);
CRFPostprocessor.setup(postProcessor, preProcessor, options);
CRFPostprocessor.execute(nThreads, preProcessor, postProcessor);
}
示例12: main
import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
/**
* A main method for training and evaluating the postprocessor.
*
* @param args
*/
public static void main(String[] args) {
// Strips off hyphens
Properties options = StringUtils.argsToProperties(args, optionArgDefs());
if (options.containsKey("help") || args.length == 0) {
System.err.println(usage(EnglishPostprocessor.class.getName()));
System.exit(-1);
}
int nThreads = PropertiesUtils.getInt(options, "nthreads", 1);
EnglishPreprocessor preProcessor = new EnglishPreprocessor();
EnglishPostprocessor postProcessor = new EnglishPostprocessor(options);
CRFPostprocessor.setup(postProcessor, preProcessor, options);
CRFPostprocessor.execute(nThreads, preProcessor, postProcessor);
}
示例13: main
import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
/**
* A main method for training and evaluating the postprocessor.
*
* @param args
*/
public static void main(String[] args) {
// Strips off hyphens
Properties options = StringUtils.argsToProperties(args, optionArgDefs());
if (options.containsKey("help") || args.length == 0) {
System.err.println(usage(SpanishPostprocessor.class.getName()));
System.exit(-1);
}
int nThreads = PropertiesUtils.getInt(options, "nthreads", 1);
SpanishPreprocessor preProcessor = new SpanishPreprocessor();
SpanishPostprocessor postProcessor = new SpanishPostprocessor(options);
CRFPostprocessor.setup(postProcessor, preProcessor, options);
CRFPostprocessor.execute(nThreads, preProcessor, postProcessor);
}
示例14: srcCompoundCnt
import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
private static <TK,FV> int srcCompoundCnt(IString srcToken, IString tgtToken, List<DynamicTranslationModel<FV>> tmList) {
int srcSize = srcToken.length();
int cnt = 0;
// each compound should have at least 4 characters
for(int k = 4; k < srcSize - 3 ; ++k) {
IString[] preSuffixes = new IString[3];
preSuffixes[0] = new IString(srcToken.subSequence(0, k).toString()); // prefix
preSuffixes[1] = new IString(srcToken.subSequence(srcSize - k , srcSize).toString()); // suffix
preSuffixes[2] = new IString(StringUtils.capitalize(srcToken.subSequence(srcSize - k, srcSize).toString())); // capitalized suffix
int mMax = preSuffixes[1] == preSuffixes[2] ? 2 : 3;
for(int m = 0; m < mMax; ++m) {
IString src = preSuffixes[m];
if(!srcGarbageCollection(src, tmList)) {
cnt += tmList.stream().mapToInt(tm -> tm.getJointLexCount(src, tgtToken)).sum();
}
}
}
return cnt;
}
示例15: tgtCompoundCnt
import edu.stanford.nlp.util.StringUtils; //导入依赖的package包/类
private static <TK,FV> int tgtCompoundCnt(IString srcToken, IString tgtToken, List<DynamicTranslationModel<FV>> tmList) {
int tgtSize = tgtToken.length();
int cnt = 0;
// each compound should have at least 4 characters
for(int k = 4; k < tgtSize - 3 ; ++k) {
IString[] preSuffixes = new IString[3];
preSuffixes[0] = new IString(tgtToken.subSequence(0, k).toString()); // prefix
preSuffixes[1] = new IString(tgtToken.subSequence(tgtSize - k , tgtSize).toString()); // suffix
preSuffixes[2] = new IString(StringUtils.capitalize(tgtToken.subSequence(tgtSize - k, tgtSize).toString())); // capitalized suffix
int mMax = preSuffixes[1] == preSuffixes[2] ? 2 : 3;
for(int m = 0; m < mMax; ++m) {
IString tgt = preSuffixes[m];
if(!tgtGarbageCollection(tgt, tmList)) {
cnt += tmList.stream().mapToInt(tm -> tm.getJointLexCount(srcToken, tgt)).sum();
}
}
}
return cnt;
}