本文整理汇总了Python中spacy.tokens.Span方法的典型用法代码示例。如果您正苦于以下问题:Python tokens.Span方法的具体用法?Python tokens.Span怎么用?Python tokens.Span使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类spacy.tokens
的用法示例。
在下文中一共展示了tokens.Span方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __new__
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def __new__(
cls,
span,
mention_index,
utterance_index,
utterance_start_sent,
speaker=None,
gold_label=None,
*args,
**kwargs,
):
# We need to override __new__ see http://cython.readthedocs.io/en/latest/src/userguide/special_methods.html
obj = spacy.tokens.Span.__new__(
cls, span.doc, span.start, span.end, *args, **kwargs
)
return obj
示例2: get_sentence
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def get_sentence(self, sid: Union[int, List[int]]=0, doc: Doc=None) -> Union[Span, List[Span]]:
"""Query a sentence in a paragraph.
Keyword Arguments:
sid {Union[int, List[int]]} -- sid the sentence id; or. (default: {None})
Returns:
Union[Span, List[Span]] -- the sentence
"""
if doc:
sentences = list(doc.sents)
else:
sentences = list(self.doc.sents)
if type(sid) == int or type(sid) == float:
if int(sid) >= 0 and int(sid) < len(sentences):
return sentences[int(sid)]
# else if it's an array
sid = [int(s) for s in sid if s >= 0 and s < len(sentences)]
if len(sid) > 0:
filtered = [sentences[s] for s in sid]
return filtered[0] if len(filtered) == 1 else filtered
if sentences:
return sentences[0]
return None
示例3: convert_doc
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def convert_doc(doc: Union[Doc, Span, 'Target'], strict_format: str=None):
def _strict_doc(doc):
if not doc:
return None
if type(doc) == str:
return doc
if strict_format == 'doc':
return doc if type(doc) == Doc else doc.as_doc()
if strict_format == 'span':
return doc if type(doc) == Span else doc[:]
return doc
def _convert(doc):
if type(doc) == str:
return doc
if type(doc) == Doc or type(doc) == Span:
return _strict_doc(doc)
else:
return _strict_doc(getattr(doc, 'doc', None))
if not doc:
return None
if type(doc) == list:
return [ _convert(d) for d in doc ]
else:
return _convert(doc)
示例4: span_to_json
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def span_to_json(sentence: Span, sid: int = 0) -> List[Dict]:
'''
@param <Span> sentence: sentence in span type
@return <Dict> json-seralized sentences
'''
if sentence is None:
return None
j_sentence = [{
'idx': t.i,
'text': t.text,
'ner': t.ent_type_,
'lemma': t.lemma_,
'pos': t.pos_,
'tag': t.tag_,
'whitespace': t.whitespace_,
'sid': sid #,
#'matches': []
} for t in sentence]
return j_sentence
示例5: span_to_bigram
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def span_to_bigram(span: Span) -> list:
grams = []
for a_idx, a in enumerate(span):
for b_idx, b in enumerate(span):
dist = b_idx - a_idx
if dist == 0:
continue
elif abs(dist) <= MARKOV_WINDOW_SIZE:
grams.append([a, b, dist])
return grams
示例6: __init__
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def __init__(
self,
span,
mention_index,
utterance_index,
utterances_start_sent,
speaker=None,
gold_label=None,
):
"""
Arguments:
span (spaCy Span): the spaCy span from which creating the Mention object
mention_index (int): index of the Mention in the Document
utterance_index (int): index of the utterance of the Mention in the Document
utterances_start_sent (int): index of the first sentence of the utterance of the Mention in the Document
(an utterance can comprise several sentences)
speaker (Speaker): the speaker of the mention
gold_label (anything): a gold label associated to the Mention (for training)
"""
self.index = mention_index
self.utterance_index = utterance_index
self.utterances_sent = utterances_start_sent + self._get_doc_sent_number()
self.speaker = speaker
self.gold_label = gold_label
self.spans_embeddings = None
self.words_embeddings = None
self.features = None
self.features_ = None
self.spans_embeddings_ = None
self.words_embeddings_ = None
self.mention_type = self._get_type()
self.propers = set(self.content_words)
self.entity_label = self._get_entity_label()
self.in_entities = self._get_in_entities()
示例7: _get_type
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def _get_type(self):
""" Find the type of the Span """
conj = ["CC", ","]
prp = ["PRP", "PRP$"]
proper = ["NNP", "NNPS"]
if any(t.tag_ in conj and t.ent_type_ not in ACCEPTED_ENTS for t in self):
mention_type = MENTION_TYPE["LIST"]
elif self.root.tag_ in prp:
mention_type = MENTION_TYPE["PRONOMINAL"]
elif self.root.ent_type_ in ACCEPTED_ENTS or self.root.tag_ in proper:
mention_type = MENTION_TYPE["PROPER"]
else:
mention_type = MENTION_TYPE["NOMINAL"]
return mention_type
示例8: heads_agree
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def heads_agree(self, mention2):
""" Does the root of the Mention match the root of another Mention/Span"""
# we allow same-type NEs to not match perfectly,
# but rather one could be included in the other, e.g., "George" -> "George Bush"
if (
self.in_entities
and mention2.in_entities
and self.entity_label == mention2.entity_label
and (
self.root.lower_ in mention2.lower_
or mention2.root.lower_ in self.lower_
)
):
return True
return self.root.lower_ == mention2.root.lower_
示例9: exact_match
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def exact_match(self, mention2):
""" Does the Mention lowercase text matches another Mention/Span lowercase text"""
return self.lower_ == mention2.lower_
示例10: default
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def default(self, obj):
if isinstance(obj, Span): return str(obj)
return json.JSONEncoder.default(self, obj)
示例11: gen_text_from_sent_list
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def gen_text_from_sent_list(sentences: List[Span]) -> str:
'''
#TODO: to comment
'''
return ''.join([s.text + s[-1].whitespace_ for s in sentences])
示例12: spans_to_json
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def spans_to_json(sentences: List[Span]) -> Dict:
'''
@param <Span[]> sentences: sentence in span type
@return <Dict> json-seralized sentences
'''
spans = []
for sid, sentence in enumerate(sentences):
spans += span_to_json(sentence, sid=sid)
return spans
示例13: length
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def length(
docs: Union['Target', Span, List[Union['Target', Span]]]) -> int:
"""
The length of a given span, in tokens.
If the input is a List, take the min length of all spans in the list.
Parameters
----------
docs : Union[Target, Span, List[Union[Target, Span]]]
The input doc(s) for computing the length.
Returns
-------
int
The length.
"""
output = 0
try:
def length_(doc):
return len(convert_doc(doc)) if doc else 0
if docs is None:
raise DSLValueError(f"No valid input to [ length ]. input: {docs}")
elif type(docs) == list and len(docs) > 0:
output = min([ length_(doc) for doc in docs ]) # convert_token
else:
output = length_(docs) # convert_token
except DSLValueError as e:
#logger.error(e)
raise(e)
except Exception as e:
#print(f'[is_digit]')
#traceback.print_exc()
ex = Exception(f"Unknown exception from [ length ]: {e}")
#logger.error(ex)
raise(ex)
#finally:
else:
#pass
return output
示例14: STRING
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def STRING(target: Union['Target', Span]) -> str:
"""Get the raw string from a given span or target.
Parameters
----------
target : Union[Target, Span]
The target to be converted to string.
Returns
-------
str
The string.
"""
output = ""
try:
if not target:
raise DSLValueError(f"No valid input to [ STRING ]. target: {target}")
else:
target = convert_list(target)
doc = convert_doc(target)[0]
if 'label' in target[0].__class__.__name__.lower():
output = target[0].get_label()
elif doc:
output = doc.text
except DSLValueError as e:
#logger.error(e)
raise(e)
except Exception as e:
ex = Exception(f"Unknown exception from [ STRING ]: {e}")
#logger.error(ex)
raise(ex)
#finally:
else:
#pass
return output
示例15: _compute_span_info
# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def _compute_span_info(self,
instance: Instance, spans: Span, feature_list: List[str], target: str, info_idxes):
if target not in instance.entries:
target_name = f'prediction(model="{target}")'
target = 'predictions'
else:
target_name = target
if len(list(np.unique(feature_list))) > 2:
return info_idxes
span_features = [ get_token_feature(t, feature_list[idx]).strip() for idx, t in enumerate(spans) ]
if any([not s or s in ["(", ")", ","] for s in span_features]):
return info_idxes
if any([ f not in VBs +WHs + NNs and feature_list[idx] == 'tag' for idx, f in enumerate(span_features) ]):
return info_idxes
pattern = ' '.join(span_features)
if pattern not in info_idxes[target]:
info_idxes[target][pattern] = defaultdict(dict)
info_idxes[target][pattern]['cover'] = defaultdict(dict)
if target != 'predictions':
info_idxes[target][pattern]['cover']['total'][instance.key()] = True
predictions = instance.get_entry('predictions') or []
for prediction in predictions:
model = prediction.model
if target == 'predictions':
if model not in target_name:
continue
else:
info_idxes[target][pattern]['cover'][model][instance.key()] = True
if instance.is_incorrect(model):
info_idxes[target][pattern][model][instance.key()] = True
#print(model, instance.is_incorrect(model))
#print(info_idxes[target][pattern][model])
return info_idxes