本文整理汇总了Java中edu.stanford.nlp.ling.TaggedWord类的典型用法代码示例。如果您正苦于以下问题:Java TaggedWord类的具体用法?Java TaggedWord怎么用?Java TaggedWord使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
TaggedWord类属于edu.stanford.nlp.ling包,在下文中一共展示了TaggedWord类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: wordIsNONAdjective
import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
public boolean wordIsNONAdjective(String word) {
if (word.endsWith("ic") || word.endsWith("ical"))
return false;
List<HasWord> sentence = MaxentTagger.tokenizeText(new StringReader(word)).get(0);
TaggedWord taggedWord = tagger.tagSentence(sentence).get(0);
// if (taggedWord.tag().equals("NN") || taggedWord.tag().equals("NNS")
// || taggedWord.tag().equals("NNP")
// || taggedWord.tag().equals("NNPS"))
if (taggedWord.tag().equals("JJ"))
return false;
return true;
}
示例2: getIDFMapForDocument
import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
/**
* Get an IDF map for the given document string.
*
* @param document
* @return
*/
private static Counter<String> getIDFMapForDocument(String document) {
// Clean up -- remove some Gigaword patterns that slow things down
// / don't help anything
document = headingSeparator.matcher(document).replaceAll("");
DocumentPreprocessor preprocessor = new DocumentPreprocessor(new StringReader(document));
preprocessor.setTokenizerFactory(tokenizerFactory);
Counter<String> idfMap = new ClassicCounter<String>();
for (List<HasWord> sentence : preprocessor) {
if (sentence.size() > MAX_SENTENCE_LENGTH)
continue;
List<TaggedWord> tagged = tagger.tagSentence(sentence);
for (TaggedWord w : tagged) {
if (w.tag().startsWith("n"))
idfMap.incrementCount(w.word());
}
}
return idfMap;
}
示例3: parse
import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
private Vector<String> parse(ArrayList<TaggedWord> taggedWords) {
Vector<ArrayList<TaggedWord>> sentences = findAppositives(taggedWords);
Vector<String> patterns = new Vector<String>();
for (Iterator<ArrayList<TaggedWord>> iterator = sentences.iterator(); iterator.hasNext(); ) {
Vector<String> pattern = null;
taggedWords = (ArrayList<TaggedWord>) iterator.next();
IntegerMangi index = new IntegerMangi(0);
while (index.get() < taggedWords.size()) {
pattern = A(taggedWords, index);
if (pattern == null)
pattern = B(taggedWords, index);
if (pattern == null)
pattern = C(taggedWords, index);
if (pattern != null)
patterns.addAll(pattern);
else
index.incr();
}
}
return patterns;
}
示例4: C
import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
private Vector<String> C(ArrayList<TaggedWord> sentence, IntegerMangi index) {
normalization_V = new int[2];
normalization_NF = new int[2];
normalization_ADJF = new Vector<int[]>();
int startIndex = index.get();
normalization_V[0] = startIndex;
if (match(sentence, index, "V")) {
normalization_V[1] = index.get();
if (ATTF(sentence, index)) {
normalization_NF[0] = index.get();
if (NF(sentence, index)) {
normalization_NF[1] = index.get();
return TaggedWordsArrayToString(sentence, "C");
}
}
}
index.set(startIndex);
return null;
}
示例5: tag
import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
public Sentence tag(Sentence sent) {
List<HasWord> ss = new ArrayList<HasWord>();
for (Token t : sent) {
HasWord hw = new Word();
hw.setWord(t.toString());
ss.add(hw);
}
List<TaggedWord> sst = tagger.tagSentence(ss);
for (tuple2<Integer,TaggedWord> item : x.enumerate(sst)) {
Token tk = sent.get(item.key);
tk.annotate("pos", item.value.tag());
sent.setAt(item.key).value(tk);
}
return sent;
}
示例6: haveEquivalentPOSTags
import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
/**
* Check if the given tokens have a comparable POS tag
**/
private Boolean haveEquivalentPOSTags(TaggedWord firstToken, TaggedWord secondToken){
if(firstToken != null && secondToken != null && firstToken.tag() != null && secondToken.tag() != null){
if(firstToken.tag().toUpperCase().startsWith("VB") && secondToken.tag().toUpperCase().startsWith("VB")){
return true;
} else if(firstToken.tag().toUpperCase().equals("MD") && secondToken.tag().toUpperCase().equals("MD")){
return true;
} else if(firstToken.tag().toUpperCase().startsWith("NN") && secondToken.tag().toUpperCase().startsWith("NN")){
return true;
} else if(firstToken.tag().toUpperCase().startsWith("RB") && secondToken.tag().toUpperCase().startsWith("RB")){
return true;
} else if(firstToken.tag().toUpperCase().equals("WRB") && secondToken.tag().toUpperCase().equals("WRB")){
return true;
} else if(firstToken.tag().toUpperCase().startsWith("JJ") && secondToken.tag().toUpperCase().startsWith("JJ")){
return true;
}
}
return false;
}
示例7: haveEquivalentPOSTags
import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
private Boolean haveEquivalentPOSTags(TaggedWord firstToken, TaggedWord secondToken){
if(firstToken != null && secondToken != null && firstToken.tag() != null && secondToken.tag() != null){
if(firstToken.tag().toUpperCase().startsWith("VB") && secondToken.tag().toUpperCase().startsWith("VB")){
return true;
} else if(firstToken.tag().toUpperCase().equals("MD") && secondToken.tag().toUpperCase().equals("MD")){
return true;
} else if(firstToken.tag().toUpperCase().startsWith("NN") && secondToken.tag().toUpperCase().startsWith("NN")){
return true;
} else if(firstToken.tag().toUpperCase().startsWith("RB") && secondToken.tag().toUpperCase().startsWith("RB")){
return true;
} else if(firstToken.tag().toUpperCase().equals("WRB") && secondToken.tag().toUpperCase().equals("WRB")){
return true;
} else if(firstToken.tag().toUpperCase().startsWith("JJ") && secondToken.tag().toUpperCase().startsWith("JJ")){
return true;
}
}
return false;
}
示例8: main
import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
/**
* Main function
*
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
// data input
String text = "John loves Mary. She loves him too.";
// model loading
StanfordNlpWrapper nlp = new StanfordNlpWrapper(Env.STANFORDNLP_CFG);
nlp.loadPosTagger();
// task run
for (List<HasWord> words : StanfordNlpWrapper.detect(text)) {
ArrayList<String> strs = new ArrayList<String>();
for (TaggedWord taggedWord : nlp.tag(words))
strs.add(String.format("%s/%s", taggedWord.word(), taggedWord.tag()));
System.out.println(String.join(" ", strs));
}
}
示例9: processSentence
import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
public List<WordLemmaTag> processSentence(String sentence, boolean isTokenized)
{
final StanfordLemmatizer lemmatizer = StanfordLemmatizer.getInstance();
final StanfordPOSTagger tagger = StanfordPOSTagger.getInstance();
final List<WordLemmaTag> tlSentence = new ArrayList<WordLemmaTag>();
// the tagged sentence
List<TaggedWord> tSentence = null;
if (isTokenized) tSentence = tagger.tag(sentence);
else
{
StanfordTokenizer tokenizer = StanfordTokenizer.getInstance();
List<Word> tokens = tokenizer.tokenize(sentence);
tSentence = tagger.tag(tokens);
}
// add to the lemmatized sentence
for (TaggedWord tw : tSentence)
tlSentence.add(lemmatizer.lemmatize(tw));
return tlSentence;
}
示例10: generateFeatureVectorFromTaggedWords
import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
@Override
public Map<Integer, Double> generateFeatureVectorFromTaggedWords(
List<TaggedWord> tweet) {
if (!m_useTaggedWords) {
throw new RuntimeException(
"Use TaggedWords was set to false! generateFeatureVectorFromTaggedWords is not applicable!");
}
Map<Integer, Double> featureVector = m_sentimentFeatureVectorGenerator
.generateFeatureVectorFromTaggedWords(tweet);
featureVector.putAll(m_POSFeatureVectorGenerator
.generateFeatureVectorFromTaggedWords(tweet));
featureVector.putAll(m_tfidfFeatureVectorGenerator
.generateFeatureVectorFromTaggedWords(tweet));
return featureVector;
}
示例11: pretagToken
import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
private TaggedWord pretagToken(String token, boolean tokenIsHashTag,
boolean tokenIsUser, boolean tokenIsURL) {
TaggedWord preTaggedToken = new TaggedWord(token);
if (tokenIsHashTag) {
preTaggedToken.setTag("HT");
} else if (tokenIsUser) {
preTaggedToken.setTag("USR");
} else if (tokenIsURL) {
preTaggedToken.setTag("URL");
} else if (StringUtils.isRetweet(token)) {
preTaggedToken.setTag("RT");
} else if (m_nameEntities.isNameEntity(token)) {
if (LOGGING) {
LOG.info("NameEntity labelled for " + token);
}
preTaggedToken.setTag("NNP");
} else if ((m_interjections.isInterjection(token))
|| (StringUtils.isEmoticon(token))) {
if (LOGGING) {
LOG.info("Interjection or Emoticon labelled for " + token);
}
preTaggedToken.setTag("UH");
}
return preTaggedToken;
}
示例12: createFromTaggedWords
import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
public static TweetTfIdf createFromTaggedWords(List<List<TaggedWord>> tweets,
TfType type, TfIdfNormalization normalization, boolean usePOSTags) {
TweetTfIdf tweetTfIdf = new TweetTfIdf(type, normalization, usePOSTags);
tweetTfIdf.m_termFreqs = tfTaggedWordTweets(tweets, type, usePOSTags);
tweetTfIdf.m_inverseDocFreq = idf(tweetTfIdf.m_termFreqs);
tweetTfIdf.m_termIds = new HashMap<String, Integer>();
int i = 0;
for (String key : tweetTfIdf.m_inverseDocFreq.keySet()) {
tweetTfIdf.m_termIds.put(key, i);
i++;
}
LOG.info("Found " + tweetTfIdf.m_inverseDocFreq.size() + " terms");
// Debug
// print("Term Frequency", m_termFreqs, m_inverseDocFreq);
// print("Inverse Document Frequency", m_inverseDocFreq);
return tweetTfIdf;
}
示例13: rawParse
import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
public ConcurrentDependencyGraph rawParse(List<TaggedWord> sentence)
throws IOException,
MaltChainedException {
String[] conll = new String[sentence.size()];
for (int i = 0; i < sentence.size(); i++) {
TaggedWord taggedWord = sentence.get(i);
String word = taggedWord.word();
String Lemma = "_";
if (this.lemmatizer != null)
Lemma = this.lemmatizer.lemmatize(word);
String pos = taggedWord.tag();
conll[i] = String.format("%s\t%s\t%s\t%s\t%s\t%s",
i + 1, word, Lemma, pos, pos, "_");
}
return parse(conll);
}
示例14: joinVerbParts
import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
/**
* Join verb parts like Dadedgan corpus.
* Input:
* دیده/ADJ_INO
* شد/V_PA
* Iutput:
* دیده شد/V_PA
*/
public static List<TaggedWord> joinVerbParts(List<TaggedWord> sentence) {
Collections.reverse(sentence);
List<TaggedWord> result = new ArrayList<>();
TaggedWord beforeTaggedWord = new TaggedWord("", "");
for (TaggedWord taggedWord : sentence) {
if (PeykareReader.tokenizer.getBeforeVerbs().contains(taggedWord.word()) ||
(PeykareReader.tokenizer.getAfterVerbs().contains(beforeTaggedWord.word()) &&
PeykareReader.tokenizer.getVerbs().contains(taggedWord.word()))) {
beforeTaggedWord.setWord(taggedWord.word() + " " + beforeTaggedWord.word());
if (result.isEmpty())
result.add(beforeTaggedWord);
}
else {
result.add(taggedWord);
beforeTaggedWord = taggedWord;
}
}
Collections.reverse(result);
return result;
}
示例15: posMapTest
import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
@Test
public void posMapTest() throws IOException {
BijankhanReader reader = new BijankhanReader(false);
List<TaggedWord> expected = new ArrayList<>();
expected.add(new TaggedWord("اولین", "ADJ"));
expected.add(new TaggedWord("سیاره", "N"));
expected.add(new TaggedWord("خارج", "ADJ"));
expected.add(new TaggedWord("از", "PREP"));
expected.add(new TaggedWord("منظومه", "N"));
expected.add(new TaggedWord("شمسی", "ADJ"));
expected.add(new TaggedWord("دیده", "ADJ"));
expected.add(new TaggedWord("شد", "V"));
expected.add(new TaggedWord(".", "PUNC"));
Iterator<List<TaggedWord>> iter = reader.getSentences().iterator();
List<TaggedWord> actual = iter.next();
assertEquals("Failed to map pos of sentence", expected.size(), actual.size());
for (int i = 0; i < expected.size(); i++) {
TaggedWord actualTaggedWord = actual.get(i);
TaggedWord expectedTaggedWord = expected.get(i);
if (!actualTaggedWord.tag().equals(expectedTaggedWord.tag()))
assertEquals("Failed to map pos of sentence", expectedTaggedWord, actualTaggedWord);
}
}