本文整理汇总了Python中spacy.tokens.Token方法的典型用法代码示例。如果您正苦于以下问题:Python tokens.Token方法的具体用法?Python tokens.Token怎么用?Python tokens.Token使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类spacy.tokens
的用法示例。
在下文中一共展示了tokens.Token方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: from_token
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Token [as 别名]
def from_token(token: Token, people: list = None) -> Optional['Pos']:
if token.text[0] == '#':
return Pos.HASHTAG
elif token.text[0] == '@':
return Pos.PROPN
elif token.text[0] == ' ' or token.text[0] == "\n":
return Pos.SPACE
if token._.is_emoji:
return Pos.EMOJI
# Makeup for shortcomings of NLP detecting online nicknames
if people is not None:
if token.text in people:
return Pos.PROPN
if re.match(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', token.text):
return Pos.URL
try:
return Pos[token.pos_]
except KeyError:
print("Unknown PoS: %s" % token.text)
return Pos.X
示例2: triple_search
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Token [as 别名]
def triple_search(triple: Triple, token: Token):
"""
Recursive search through the dependency tree
looks for triple values in each of the children and calls itself with the children nodes
"""
question_word = None
for word in token.children:
if word.text.lower() in QuestionWord.question_words:
question_word = QuestionWord(word)
word = QuestionWord(word)
if not triple.get_object():
triple.set_object(question_word)
elif word.dep_ in OBJECT_SET:
triple.set_object(word)
if word.dep_ in SUBJECT_SET:
triple.set_subject(word)
if isinstance(word, Token) and word.dep_ not in RECURSION_BLACKLIST:
triple = triple_search(triple, word)
if not triple.get_subject() and question_word:
triple.set_subject(question_word)
return triple
示例3: get_token_feature
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Token [as 别名]
def get_token_feature(t: Token, label: str) -> str:
"""Get the linguistic feature given a Spacy.Token obj and a label
Arguments:
t {Token} -- input token
label {str} -- linguistic feature to return
Returns:
str -- linguistic feature
"""
if label in ['text', 'orth']:
return t.text
if label.lower() == 'ent':
label = 'ent_type'
return getattr(t, '{}_'.format(label.lower()), '')
示例4: remove_stopwords
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Token [as 别名]
def remove_stopwords(self, sentence_str: str=None, tokens: List[Token]=None, use_lemma: bool=True) -> str:
"""Function which gets a normalized string of the sentence and removes stop words
Keyword Arguments:
sentence_str {str} -- input sentence string (default: {None})
tokens {List[Token]} -- pre-computed token list, with feature added (default: {None})
use_lemma {bool} -- return the lemma or the text (default: {True})
Returns:
str -- the str with stopwords removed
"""
if not tokens and sentence_str:
#sentence_str = normalize_answer(sentence_str)
tokens = self.model(sentence_str)
elif not tokens:
tokens = []
#word_tokenize(sentence_str)
attr = 'lemma_' if use_lemma else 'text' # what to merge
return ' '.join([ getattr(token, attr) for token in tokens
if not token.is_punct and token.text not in STOP_WORDS and token.lemma_ not in STOP_WORDS])
示例5: change_matched_token_form
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Token [as 别名]
def change_matched_token_form(a_token: Token,
a_pattern: Dict[str, str],
b_pattern: Dict[str, str]) -> str:
# first, deal with orth.
if get_str_from_pattern(b_pattern):
return get_str_from_pattern(b_pattern)
elif 'TAG' in b_pattern and 'TAG' in a_pattern: # deal with the tags
# singular -> plural
if a_pattern['TAG'] in ['NN', 'NNP'] and b_pattern['TAG'] in ['NNS', 'NNPS']:
return pluralize(a_token.text)
# plural -> singular
elif b_pattern['TAG'] in ['NN', 'NNP'] and a_pattern['TAG'] in ['NNS', 'NNPS']:
return singularize(a_token.text)
# verb form change
elif a_pattern['TAG'] in VBs and b_pattern['TAG'] in VBs:
return conjugate(a_token.text, tag=b_pattern['TAG'])
elif 'POS' in b_pattern and 'POS' in a_pattern:
# if IS_DEBUGGING == 'change_matched_token_form':
# print ('unmachted token form change', a_token, b_token, a_pattern, b_pattern)
return a_token.text
return a_token.text
示例6: from_token
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Token [as 别名]
def from_token(token: Token) -> 'MarkovNeighbor':
key = token.text.lower()
text = token.text
if CapitalizationMode.from_token(token, CAPITALIZATION_COMPOUND_RULES) == CapitalizationMode.COMPOUND:
compound = True
else:
compound = False
pos = Pos.from_token(token)
values = [0, 0]
dist = [0] * (MARKOV_WINDOW_SIZE * 2 + 1)
return MarkovNeighbor(key, text, pos, compound, values, dist)
示例7: analyze
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Token [as 别名]
def analyze(token: Token, mode: CapitalizationMode):
pos = Pos.from_token(token)
mode = PoSCapitalizationMode(pos, mode)
return mode.to_embedding()
示例8: tokens_to_indices
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Token [as 别名]
def tokens_to_indices(
self, tokens: List[SpacyToken], vocabulary: Vocabulary
) -> Dict[str, List[numpy.ndarray]]:
if not all(isinstance(x, SpacyToken) for x in tokens):
raise ValueError(
"The spacy indexer requires you to use a Tokenizer which produces SpacyTokens."
)
indices: List[numpy.ndarray] = [token.vector for token in tokens]
return {"tokens": indices}
示例9: __init__
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Token [as 别名]
def __init__(self, tokens: List[Token], token_indexers: Dict[str, TokenIndexer]) -> None:
self.tokens = tokens
self._token_indexers = token_indexers
self._indexed_tokens: Optional[Dict[str, IndexedTokenList]] = None
if not all(isinstance(x, (Token, SpacyToken)) for x in tokens):
raise ConfigurationError(
"TextFields must be passed Tokens. "
"Found: {} with types {}.".format(tokens, [type(x) for x in tokens])
)
示例10: __init__
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Token [as 别名]
def __init__(self, tokens , token_indexers ) :
self.tokens = tokens
self._token_indexers = token_indexers
self._indexed_tokens = None
self._indexer_name_to_indexed_token = None
if not all([isinstance(x, (Token, SpacyToken)) for x in tokens]):
raise ConfigurationError(u"TextFields must be passed Tokens. "
u"Found: {} with types {}.".format(tokens, [type(x) for x in tokens]))
#overrides
示例11: __init__
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Token [as 别名]
def __init__(self, tokens: List[Token], token_indexers: Dict[str, TokenIndexer]) -> None:
self.tokens = tokens
self._token_indexers = token_indexers
self._indexed_tokens: Optional[Dict[str, TokenList]] = None
self._indexer_name_to_indexed_token: Optional[Dict[str, List[str]]] = None
if not all([isinstance(x, (Token, SpacyToken)) for x in tokens]):
raise ConfigurationError("TextFields must be passed Tokens. "
"Found: {} with types {}.".format(tokens, [type(x) for x in tokens]))
示例12: __init__
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Token [as 别名]
def __init__(self, token: Token):
self.text = self.question_words[token.text.lower()]
self.lemma_ = self.question_words[token.text.lower()]
self.pos_ = self.question_pos
self.dep_ = token.dep_
self.is_space = False
self.children = list()
示例13: __init__
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Token [as 别名]
def __init__(self, subject: Token = None, predicate: Token = None, object: Token = None):
self.set_subject(subject)
self.set_predicate(predicate)
self.set_object(object)
示例14: set_subject
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Token [as 别名]
def set_subject(self, subject: Union[Token, QuestionWord]):
self._subject = subject
示例15: set_predicate
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Token [as 别名]
def set_predicate(self, predicate: Token):
self._predicate = predicate