本文整理匯總了Java中edu.stanford.nlp.ling.HasWord類的典型用法代碼示例。如果您正苦於以下問題:Java HasWord類的具體用法?Java HasWord怎麽用?Java HasWord使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
HasWord類屬於edu.stanford.nlp.ling包,在下文中一共展示了HasWord類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: doRun
import edu.stanford.nlp.ling.HasWord; //導入依賴的package包/類
@Override
protected List<Word> doRun(Language language, String sentence) {
MaxentTagger tagger = taggers.computeIfAbsent(language, lang -> {
if (lang == EN) {
return new MaxentTagger("edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger");
}
throw new UnsupportedLanguageException(lang);
});
PartOfSpeechSet partOfSpeechSet = PartOfSpeechSet.getPOSSet(language);
List<Word> words = new ArrayList<>();
List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new StringReader(sentence));
sentences.forEach(s -> {
tagger.tagSentence(s).forEach(taggedWord ->
words.add(new Word(partOfSpeechSet.valueOf(taggedWord.tag()), taggedWord.value())));
});
return words;
}
示例2: demoDP
import edu.stanford.nlp.ling.HasWord; //導入依賴的package包/類
/**
* demoDP demonstrates turning a file into tokens and then parse trees. Note
* that the trees are printed by calling pennPrint on the Tree object. It is
* also possible to pass a PrintWriter to pennPrint if you want to capture
* the output.
*
* file => tokens => parse trees
*/
public static void demoDP(LexicalizedParser lp, String filename) {
// This option shows loading, sentence-segmenting and tokenizing
// a file using DocumentPreprocessor.
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
// You could also create a tokenizer here (as below) and pass it
// to DocumentPreprocessor
for (List<HasWord> sentence : new DocumentPreprocessor(filename)) {
Tree parse = lp.apply(sentence);
parse.pennPrint();
System.out.println();
GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
Collection tdl = gs.typedDependenciesCCprocessed();
System.out.println(tdl);
System.out.println();
}
}
示例3: wordIsNONAdjective
import edu.stanford.nlp.ling.HasWord; //導入依賴的package包/類
public boolean wordIsNONAdjective(String word) {
if (word.endsWith("ic") || word.endsWith("ical"))
return false;
List<HasWord> sentence = MaxentTagger.tokenizeText(new StringReader(word)).get(0);
TaggedWord taggedWord = tagger.tagSentence(sentence).get(0);
// if (taggedWord.tag().equals("NN") || taggedWord.tag().equals("NNS")
// || taggedWord.tag().equals("NNP")
// || taggedWord.tag().equals("NNPS"))
if (taggedWord.tag().equals("JJ"))
return false;
return true;
}
示例4: getIDFMapForDocument
import edu.stanford.nlp.ling.HasWord; //導入依賴的package包/類
/**
* Get an IDF map for the given document string.
*
* @param document
* @return
*/
private static Counter<String> getIDFMapForDocument(String document) {
// Clean up -- remove some Gigaword patterns that slow things down
// / don't help anything
document = headingSeparator.matcher(document).replaceAll("");
DocumentPreprocessor preprocessor = new DocumentPreprocessor(new StringReader(document));
preprocessor.setTokenizerFactory(tokenizerFactory);
Counter<String> idfMap = new ClassicCounter<String>();
for (List<HasWord> sentence : preprocessor) {
if (sentence.size() > MAX_SENTENCE_LENGTH)
continue;
List<TaggedWord> tagged = tagger.tagSentence(sentence);
for (TaggedWord w : tagged) {
if (w.tag().startsWith("n"))
idfMap.incrementCount(w.word());
}
}
return idfMap;
}
示例5: perClusterUpdateSen
import edu.stanford.nlp.ling.HasWord; //導入依賴的package包/類
public static void perClusterUpdateSen(ArrayList<List<HasWord>> processedText,
int common_sentNum, int representative_sentNum,
int coreStartIndex, int coreEndIndex,
int commonStartIndex, int commonEndIndex){
List<HasWord> representative_sentence =
processedText.get(representative_sentNum-1);
List<HasWord> common_sentence =
processedText.get(common_sentNum-1);
HasWord replace = new Word();
String replaceStr = "";
for (int i = coreStartIndex-1; i < coreEndIndex - 1; i++){
replaceStr += representative_sentence.get(i).toString();
replaceStr += " ";
}
replace.setWord(replaceStr.trim());
for (int i=commonStartIndex-1; i < commonEndIndex-1; i++){
common_sentence.set(i,new Word());
common_sentence.get(i).setWord("");
}
common_sentence.set(commonStartIndex-1, replace);
}
示例6: DepTree
import edu.stanford.nlp.ling.HasWord; //導入依賴的package包/類
public DepTree(TypedDependency root, Collection<TypedDependency> tds,
List<? extends HasWord> sentence, int[] remapping, IntSet stack) {
this.map = new HashMap<>();
int t = root.dep().index();
node = sentence.get(t - 1).word();
//tag = root.dep().tag();
tag = root.dep().label().tag();
this.idx = remapping[t - 1];
if (!stack.contains(t)) {
IntSet stack2 = new IntRBTreeSet(stack);
stack2.add(t);
for (TypedDependency td : tds) {
if (td.gov().index() == t && td.dep().index() != t) {
map.put(td.reln().getShortName(), new DepTree(td, tds, sentence, remapping, stack2));
}
}
}
}
示例7: remap
import edu.stanford.nlp.ling.HasWord; //導入依賴的package包/類
private static int[] remap(List<? extends HasWord> sentence, String[] s) {
int j = 0;
int[] map = new int[sentence.size()];
for (int i = 0; i < sentence.size(); i++) {
map[i] = j;
int k = j + 1;
for (; k < s.length; k++) {
if (i + 1 < sentence.size() && sentence.get(i + 1).word().equals(s[k])) {
break;
}
}
if (k < s.length) {
j = k;
}
}
return map;
}
示例8: tag
import edu.stanford.nlp.ling.HasWord; //導入依賴的package包/類
public Sentence tag(Sentence sent) {
List<HasWord> ss = new ArrayList<HasWord>();
for (Token t : sent) {
HasWord hw = new Word();
hw.setWord(t.toString());
ss.add(hw);
}
List<TaggedWord> sst = tagger.tagSentence(ss);
for (tuple2<Integer,TaggedWord> item : x.enumerate(sst)) {
Token tk = sent.get(item.key);
tk.annotate("pos", item.value.tag());
sent.setAt(item.key).value(tk);
}
return sent;
}
示例9: applyPTBTokenizer
import edu.stanford.nlp.ling.HasWord; //導入依賴的package包/類
private static List<String> applyPTBTokenizer(DocumentPreprocessor dp, boolean tokenizeNLs, boolean ptb3Escaping) {
PTBTokenizerFactory<Word> tf = PTBTokenizer.PTBTokenizerFactory.newWordTokenizerFactory("tokenizeNLs=" + tokenizeNLs + ",ptb3Escaping=" + ptb3Escaping + ",asciiQuotes=true");
dp.setTokenizerFactory(tf);
List<String> sentences = new ArrayList<>();
for (List<HasWord> wordList : dp) {
String sentence = "";
for (HasWord word : wordList) {
sentence += " " + splitCompounds(word.word());
}
sentences.add(sentence);
}
return sentences;
}
示例10: analyzeSentences
import edu.stanford.nlp.ling.HasWord; //導入依賴的package包/類
/**
* @desc Splits strings into sentences. Prints one per line.
* @param paragraphs List of strings of sentences
*/
private void analyzeSentences(List<String> paragraphs) {
// Paragraphs may be multiple sentences
for( String fragment : paragraphs ) {
String lowered = fragment.toLowerCase();
// Don't process MUC document headers.
if( lowered.startsWith("dev-muc") || (lowered.startsWith("tst") && lowered.contains("-muc")) ) {
System.out.println("\n" + fragment + "\n");
}
else {
// Split and loop over sentences.
for( List<HasWord> sentence : Ling.getSentencesFromText(fragment) ) {
int i = 0;
for( HasWord token : sentence ) {
if( i++ > 0 ) System.out.print(" ");
System.out.print(normalizeJavaNLPToken(token));
}
System.out.println();
}
}
}
}
示例11: tokenize
import edu.stanford.nlp.ling.HasWord; //導入依賴的package包/類
public ListMatrix<ListMatrix<String>> tokenize(String input) throws Exception {
ListMatrix<ListMatrix<String>> result = new DefaultListMatrix<ListMatrix<String>>();
StringReader sr = new StringReader(input);
List<List<HasWord>> sentences = MaxentTagger.tokenizeText(sr);
for (List<HasWord> tokSentence : sentences) {
ListMatrix<String> m = new DefaultListMatrix<String>();
for (HasWord t : tokSentence) {
m.add(t.word());
}
result.add(m);
}
return result;
}
示例12: splitSentencesINDocument
import edu.stanford.nlp.ling.HasWord; //導入依賴的package包/類
public List<String> splitSentencesINDocument(String sDoc)
{
Reader reader = new StringReader(sDoc);
DocumentPreprocessor dp = new DocumentPreprocessor(reader);
List<String> sentenceList = new ArrayList<String>();
Iterator<List<HasWord>> it = dp.iterator();
while (it.hasNext())
{
StringBuilder sentenceSb = new StringBuilder();
List<HasWord> sentence = it.next();
for (HasWord token : sentence)
{
if(sentenceSb.length()>1)
{
sentenceSb.append(" ");
}
sentenceSb.append(token);
}
sentenceList.add(sentenceSb.toString().trim());
}
return sentenceList;
}
示例13: main
import edu.stanford.nlp.ling.HasWord; //導入依賴的package包/類
/**
* Main function
*
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
// data input
String text = "John loves Mary. She loves him too.";
// model loading
StanfordNlpWrapper nlp = new StanfordNlpWrapper(Env.STANFORDNLP_CFG);
nlp.loadPosTagger();
// task run
for (List<HasWord> words : StanfordNlpWrapper.detect(text)) {
ArrayList<String> strs = new ArrayList<String>();
for (TaggedWord taggedWord : nlp.tag(words))
strs.add(String.format("%s/%s", taggedWord.word(), taggedWord.tag()));
System.out.println(String.join(" ", strs));
}
}
示例14: testStanfordNlpWrapperForBasic
import edu.stanford.nlp.ling.HasWord; //導入依賴的package包/類
/**
* StanfordNlpWrapper Test for basic functions
*
* @throws IOException
* @throws SAXException
* @throws ParserConfigurationException
* @throws XPathExpressionException
*/
public void testStanfordNlpWrapperForBasic() throws IOException, ParserConfigurationException, SAXException, XPathExpressionException {
System.out.println("\n----- testStanfordNlpWrapperForBasic() ------------------------------");
if (!TEST_BASIC)
return;
String text = "Samsung Electronics is a South Korean multinational electronics company headquartered in Suwon, South Korea.";
text += " It is the flagship subsidiary of the Samsung Group.";
StanfordNlpWrapper nlp = new StanfordNlpWrapper(Env.STANFORDNLP_CFG);
nlp.loadPosTagger();
nlp.loadLexParser();
assertTrue(nlp.tagger != null);
assertTrue(nlp.parser != null);
assertEquals(2, StanfordNlpWrapper.detect(text).size());
for (List<HasWord> words : StanfordNlpWrapper.detect(text)) {
System.out.println("\n[Sentence] " + JString.join(" ", words));
assertEquals(words.size(), nlp.tag(words).size());
System.out.println(" <Tagged> " + JString.join(" ", nlp.tag(words)));
JXml xml = new JXml(StanfordNlpWrapper.toTreeString(nlp.parse(words), "xmlTree"));
assertEquals(1, xml.findElements("ROOT/S/VP").size());
System.out.println(" <Parsed> " + StanfordNlpWrapper.toTreeString(nlp.parse(words), "oneline"));
}
}
示例15: initTreeHelper
import edu.stanford.nlp.ling.HasWord; //導入依賴的package包/類
public void initTreeHelper(String sentence) {
Tokenizer<? extends HasWord> toke = tlp.getTokenizerFactory()
.getTokenizer(new StringReader(sentence));
List<? extends HasWord> sentence_token = toke.tokenize();
this.parse = lp.apply(sentence_token);
// stanford pst
StringBuilder sb = new StringBuilder();
this.parse.toStringBuilder(sb);
// System.out.println("PST:\n " + sb.toString());
// dependency tree
GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
this.tdl = gs.typedDependencies();
// System.out.println("DT:\n " + tdl);
}