本文整理汇总了Java中de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token类的典型用法代码示例。如果您正苦于以下问题:Java Token类的具体用法?Java Token怎么用?Java Token使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Token类属于de.tudarmstadt.ukp.dkpro.core.api.segmentation.type包,在下文中一共展示了Token类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: createAllFeatureExtractors
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入依赖的package包/类
/**
* Creates all the features extractors that will be used. To remove code redundancy the method is public static and
* therefore accessible in the Features2Xml class
*
* @return the list of feature extractors
*/
public static List<FeatureExtractor1<Token>> createAllFeatureExtractors() throws IOException {
//create all feature extractors
List<FeatureExtractor1<Token>> allFeatureExtractors = new ArrayList<>();
TypePathExtractor<Token> stemExtractor = FeatureExtractorFactory.createTokenTypePathExtractors();
FeatureExtractor1<Token> tokenFeatureExtractor = FeatureExtractorFactory.createTokenFeatureExtractors();
CleartkExtractor<Token, Token> contextFeatureExtractor = FeatureExtractorFactory.createTokenContextExtractors();
FeatureFunctionExtractor nameListExtractors = FeatureExtractorFactory.createNameListExtractors();
FeatureFunctionExtractor cityListExtractors = FeatureExtractorFactory.createCityListExtractors();
FeatureFunctionExtractor countryListExtractors = FeatureExtractorFactory.createCountryListExtractors();
FeatureFunctionExtractor miscListExtractors = FeatureExtractorFactory.createMiscListExtractors();
FeatureFunctionExtractor orgListExtractors = FeatureExtractorFactory.createOrgListExtractors();
FeatureFunctionExtractor locListExtractors = FeatureExtractorFactory.createLocListExtractors();
allFeatureExtractors.add(stemExtractor);
allFeatureExtractors.add(tokenFeatureExtractor);
allFeatureExtractors.add(contextFeatureExtractor);
allFeatureExtractors.add(nameListExtractors);
allFeatureExtractors.add(cityListExtractors);
allFeatureExtractors.add(countryListExtractors);
allFeatureExtractors.add(miscListExtractors);
allFeatureExtractors.add(orgListExtractors);
allFeatureExtractors.add(locListExtractors);
return allFeatureExtractors;
}
示例2: createTokenFeatureExtractors
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入依赖的package包/类
public static FeatureExtractor1<Token> createTokenFeatureExtractors() {
// create a function feature extractor that creates features corresponding to the token
// Note the difference between feature extractors and feature functions here. Feature extractors take an Annotation
// from the JCas and extract features from it. Feature functions take the features produced by the feature extractor
// and generate new features from the old ones. Since feature functions don’t need to look up information in the JCas,
// they may be more efficient than feature extractors. So, the e.g. the CharacterNgramFeatureFunction simply extract
// suffixes from the text returned by the CoveredTextExtractor.
return new FeatureFunctionExtractor<>(
// the FeatureExtractor that takes the token annotation from the JCas and produces the covered text
new CoveredTextExtractor<Token>(),
// feature function that produces the lower cased word (based on the output of the CoveredTextExtractor)
new LowerCaseFeatureFunction(),
// feature function that produces the capitalization type of the word (e.g. all uppercase, all lowercase...)
new CapitalTypeFeatureFunction(),
// feature function that produces the numeric type of the word (numeric, alphanumeric...)
new NumericTypeFeatureFunction(),
// feature function that produces the suffix of the word as character bigram (last two chars of the word)
new CharacterNgramFeatureFunction(CharacterNgramFeatureFunction.Orientation.RIGHT_TO_LEFT, 0, 2),
// feature function that produces the suffix of the word as character trigram (last three chars of the word)
new CharacterNgramFeatureFunction(CharacterNgramFeatureFunction.Orientation.RIGHT_TO_LEFT, 0, 3),
// feature function that produces the Character Category Pattern (based on the Unicode Categories) for the Token
new CharacterCategoryPatternFunction());
}
示例3: initialize
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入依赖的package包/类
@SuppressWarnings("unchecked")
@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
super.initialize(context);
// instantiate and add feature extractors
if (featureExtractionFile == null) {
try {
featureExtractors = FeatureExtractorFactory.createAllFeatureExtractors();
} catch (IOException e) {
e.printStackTrace();
}
} else {
// load the settings from a file
// initialize the XStream if a xml file is given:
XStream xstream = XStreamFactory.createXStream();
featureExtractors = (List<FeatureExtractor1<Token>>) xstream.fromXML(new File(featureExtractionFile));
}
}
示例4: argAnnotationBegins
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入依赖的package包/类
/**
* Returns true, if the argument component annotation begins at this token
*
* @param t token
* @param jCas jcas
* @return boolean
*/
public static ArgumentComponent argAnnotationBegins(Token t, JCas jCas)
{
List<ArgumentComponent> argumentAnnotations = new ArrayList<>();
argumentAnnotations
.addAll(JCasUtil.selectCovering(jCas, Claim.class, t.getBegin(), t.getEnd()));
argumentAnnotations
.addAll(JCasUtil.selectCovering(jCas, Backing.class, t.getBegin(), t.getEnd()));
argumentAnnotations
.addAll(JCasUtil.selectCovering(jCas, Premise.class, t.getBegin(), t.getEnd()));
argumentAnnotations
.addAll(JCasUtil.selectCovering(jCas, Rebuttal.class, t.getBegin(), t.getEnd()));
argumentAnnotations
.addAll(JCasUtil.selectCovering(jCas, Refutation.class, t.getBegin(), t.getEnd()));
if (!argumentAnnotations.isEmpty() && argumentAnnotations.get(0).getBegin() == t
.getBegin()) {
return argumentAnnotations.get(0);
}
return null;
}
示例5: argAnnotationEnds
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入依赖的package包/类
/**
* Returns true, if the argument component annotation ends at this token
*
* @param t token
* @param jCas jcas
* @return boolean
*/
public static boolean argAnnotationEnds(Token t, JCas jCas)
{
List<ArgumentComponent> argumentAnnotations = new ArrayList<>();
argumentAnnotations
.addAll(JCasUtil.selectCovering(jCas, Claim.class, t.getBegin(), t.getEnd()));
argumentAnnotations
.addAll(JCasUtil.selectCovering(jCas, Backing.class, t.getBegin(), t.getEnd()));
argumentAnnotations
.addAll(JCasUtil.selectCovering(jCas, Premise.class, t.getBegin(), t.getEnd()));
argumentAnnotations
.addAll(JCasUtil.selectCovering(jCas, Rebuttal.class, t.getBegin(), t.getEnd()));
argumentAnnotations
.addAll(JCasUtil.selectCovering(jCas, Refutation.class, t.getBegin(), t.getEnd()));
return !argumentAnnotations.isEmpty() && argumentAnnotations.get(0).getEnd() == t.getEnd();
}
示例6: copyParagraphAndTokenAnnotations
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入依赖的package包/类
private static void copyParagraphAndTokenAnnotations(JCas source, JCas target)
{
if (!source.getDocumentText().equals(target.getDocumentText())) {
throw new IllegalArgumentException("Source and target have different content");
}
for (Paragraph p : JCasUtil.select(source, Paragraph.class)) {
Paragraph paragraph = new Paragraph(target);
paragraph.setBegin(p.getBegin());
paragraph.setEnd(p.getEnd());
paragraph.addToIndexes();
}
for (Token t : JCasUtil.select(source, Token.class)) {
Token token = new Token(target);
token.setBegin(t.getBegin());
token.setEnd(t.getEnd());
token.addToIndexes();
}
}
示例7: keepArgument
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入依赖的package包/类
@Override
boolean keepArgument(JCas jCas)
{
Collection<Token> tokens = JCasUtil.select(jCas, Token.class);
int oovWords = 0;
for (Token token : tokens) {
if (!vocabulary.contains(token.getCoveredText())) {
oovWords++;
}
}
frequency.addValue(oovWords);
// System.out.println(frequency);
return oovWords <= THRESHOLD;
}
示例8: findVerbs
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入依赖的package包/类
private void findVerbs(JCas jcas, Sentence sentence) {
List<CC> concepts = JCasUtil.selectCovered(jcas, CC.class, sentence);
if (concepts.size() >= 2) {
for (CC c1 : concepts) {
for (CC c2 : concepts) {
if (c1 != c2 && c1.getEnd() < c2.getBegin()) {
List<PToken> tokens = new ArrayList<PToken>();
boolean hasVerb = false;
for (Token t : JCasUtil.selectCovered(Token.class, sentence)) {
if (t.getBegin() > c1.getEnd() && t.getEnd() < c2.getBegin()) {
tokens.add(this.parent.getToken(t));
if (t.getPos().getPosValue().startsWith("V"))
hasVerb = true;
}
}
if (tokens.size() > 0 && tokens.size() < 10 && hasVerb)
this.addLink(c1, c2, tokens);
}
}
}
}
}
示例9: process
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入依赖的package包/类
@Override
public void process(JCas aJCas)
throws AnalysisEngineProcessException
{
for (Sentence sent : JCasUtil.select(aJCas, Sentence.class)) {
TextClassificationSequence sequence = new TextClassificationSequence(aJCas,
sent.getBegin(), sent.getEnd());
sequence.addToIndexes();
List<Token> tokens = JCasUtil.selectCovered(aJCas, Token.class, sent);
for (Token token : tokens) {
TextClassificationTarget target = new TextClassificationTarget(aJCas, token.getBegin(),
token.getEnd());
target.setId(tcId++);
target.setSuffix(token.getCoveredText());
target.addToIndexes();
TextClassificationOutcome outcome = new TextClassificationOutcome(aJCas,
token.getBegin(), token.getEnd());
outcome.setOutcome(getTextClassificationOutcome(aJCas, target));
outcome.addToIndexes();
}
}
}
示例10: process
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入依赖的package包/类
@Override
public void process(JCas aJCas)
throws AnalysisEngineProcessException
{
List<Token> tokens = new ArrayList<Token>(JCasUtil.select(aJCas, Token.class));
for (Token token : tokens) {
TextClassificationTarget unit = new TextClassificationTarget(aJCas, token.getBegin(),
token.getEnd());
unit.setId(tcId++);
unit.setSuffix(token.getCoveredText());
unit.addToIndexes();
TextClassificationOutcome outcome = new TextClassificationOutcome(aJCas,
token.getBegin(), token.getEnd());
outcome.setOutcome("X");
outcome.addToIndexes();
}
}
示例11: process
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入依赖的package包/类
@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
TreeMatcher treeMatcher = new TreeMatcher(this.tree);
Iterator<Token> iterator = JCasUtil.iterator(jcas, Token.class);
Type type = CasUtil.getType(jcas.getCas(), this.annotationType);
while (iterator.hasNext()) {
Token token = iterator.next();
String tokenText = token.getCoveredText();
tokenText = this.textNormalizer.normalize(tokenText);
treeMatcher.proceed(token.getBegin(), token.getEnd(), tokenText);
List<TreeMatch> matches = treeMatcher.getMatches();
for (TreeMatch match : matches) {
for (EntryMetadata metadata : match.matchedEntries()) {
annotate(jcas, type, match, metadata);
}
}
}
}
示例12: createDictionaryAnnotatorEngine
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入依赖的package包/类
private static AnalysisEngine createDictionaryAnnotatorEngine() throws Exception {
AggregateBuilder builder = new AggregateBuilder();
builder.add(AnalysisEngineFactory.createEngineDescription(SimpleTokenizer.class,
UimaUtil.SENTENCE_TYPE_PARAMETER, Sentence.class.getName(),
UimaUtil.TOKEN_TYPE_PARAMETER, Token.class.getName()));
builder.add(AnalysisEngineFactory.createEngineDescription(DictionaryAnnotator.class,
DictionaryAnnotator.PARAM_DICTIONARY_LOCATION, "classpath:benchmark-dictionary.csv",
DictionaryAnnotator.PARAM_TOKENIZER_CLASS, SimpleOpenNlpTokenizer.class.getName(),
DictionaryAnnotator.PARAM_ANNOTATION_TYPE, DictionaryEntry.class.getName(),
DictionaryAnnotator.PARAM_CSV_SEPARATOR, ";",
DictionaryAnnotator.PARAM_DICTIONARY_CASE_SENSITIVE, true,
DictionaryAnnotator.PARAM_DICTIONARY_ACCENT_SENSITIVE, true));
AnalysisEngine engine = AnalysisEngineFactory.createEngine(builder.createAggregateDescription());
return engine;
}
示例13: process
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入依赖的package包/类
private JCas process(AnalysisEngineDescription dictionaryDescription, String text) {
try {
AggregateBuilder builder = new AggregateBuilder();
builder.add(AnalysisEngineFactory.createEngineDescription(SimpleTokenizer.class,
UimaUtil.SENTENCE_TYPE_PARAMETER, "uima.tcas.DocumentAnnotation",
UimaUtil.TOKEN_TYPE_PARAMETER, Token.class.getName()));
builder.add(dictionaryDescription);
AnalysisEngine engine = AnalysisEngineFactory.createEngine(builder.createAggregateDescription());
JCas jcas = engine.newJCas();
jcas.setDocumentText(text);
engine.process(jcas);
return jcas;
} catch (Exception e) {
throw new RuntimeException("Failed to create UIMA engine", e);
}
}
示例14: getLexicalHead
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入依赖的package包/类
/**
* If the current clauseHead points to an auxiliary verb via 'cop', this
* method returns the 'lexical' head (may be noun predicate etc) which has
* the connections to the other dependents, e.g. nsubj etc
*
* @param clauseHead
* @param childNodeMap
* @return
*/
public static Token getLexicalHead(Token clauseHead,
Map<Token, Set<Dependency>> childNodeMap) {
if (childNodeMap.containsKey(clauseHead)) {
return clauseHead;
}
// token has no children: this happens in the case of auxiliary
// verbs which are attached as child nodes to the 'lexical head',
// e.g. in 'he is a member', the lexical head is 'member', 'he' is
// the subject of 'member'
// for the case of copula, modify clause head
Token lexicalHead = clauseHead;
if (!childNodeMap.containsKey(clauseHead)) {
// if head is e.g. copula, then it has no further children.
// find the 'main predicate'
Collection<Dependency> deps = JCasUtil.selectCovered(
Dependency.class, clauseHead);
if (!deps.isEmpty()) {
lexicalHead = deps.iterator().next().getGovernor();
}
}
return lexicalHead;
}
示例15: hasTemporalModifier
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入依赖的package包/类
/**
* Does the clause (token is the clauses' head) have a temporal modifier?
*
* @param token
* @param childNodeMap
* @return
*/
public static boolean hasTemporalModifier(Token token,
Map<Token, Set<Dependency>> childNodeMap) {
// if token is a modal, need to check its head.
token = getLexicalHead(token, childNodeMap);
if (!childNodeMap.containsKey(token)) {
return false;
}
// check children: any tmod?
for (Dependency dep : childNodeMap.get(token)) {
if (dep.getDependencyType().equals("tmod")) {
return true;
}
}
return false;
}