当前位置: 首页>>代码示例>>Python>>正文


Python tokens.Span方法代码示例

本文整理汇总了Python中spacy.tokens.Span方法的典型用法代码示例。如果您正苦于以下问题:Python tokens.Span方法的具体用法?Python tokens.Span怎么用?Python tokens.Span使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在spacy.tokens的用法示例。


在下文中一共展示了tokens.Span方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __new__

# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def __new__(
        cls,
        span,
        mention_index,
        utterance_index,
        utterance_start_sent,
        speaker=None,
        gold_label=None,
        *args,
        **kwargs,
    ):
        # We need to override __new__ see http://cython.readthedocs.io/en/latest/src/userguide/special_methods.html
        obj = spacy.tokens.Span.__new__(
            cls, span.doc, span.start, span.end, *args, **kwargs
        )
        return obj 
开发者ID:huggingface,项目名称:neuralcoref,代码行数:18,代码来源:document.py

示例2: get_sentence

# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def get_sentence(self, sid: Union[int, List[int]]=0, doc: Doc=None) -> Union[Span, List[Span]]:
        """Query a sentence in a paragraph.
        
        Keyword Arguments:
            sid {Union[int, List[int]]} -- sid the sentence id; or. (default: {None})
        
        Returns:
            Union[Span, List[Span]] -- the sentence
        """
        if doc:
            sentences = list(doc.sents)
        else:
            sentences = list(self.doc.sents)
        if type(sid) == int or type(sid) == float:
            if int(sid) >= 0 and int(sid) < len(sentences):
               return sentences[int(sid)]
        # else if it's an array
        sid = [int(s) for s in sid if s >= 0 and s < len(sentences)]
        if len(sid) > 0:
            filtered = [sentences[s] for s in sid]
            return filtered[0] if len(filtered) == 1 else filtered
        if sentences:
            return sentences[0]
        return None 
开发者ID:uwdata,项目名称:errudite,代码行数:26,代码来源:context.py

示例3: convert_doc

# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def convert_doc(doc: Union[Doc, Span, 'Target'], strict_format: str=None):
    def _strict_doc(doc):
        if not doc:
            return None
        if type(doc) == str:
            return doc
        if strict_format == 'doc':
            return doc if type(doc) == Doc else doc.as_doc()
        if strict_format == 'span':
            return doc if type(doc) == Span else doc[:]
        return doc
    def _convert(doc):
        if type(doc) == str:
            return doc
        if type(doc) == Doc or type(doc) == Span:
            return _strict_doc(doc)
        else:
            return _strict_doc(getattr(doc, 'doc', None))
    if not doc:
        return None
    if type(doc) == list:
        return [ _convert(d) for d in doc ]
    else:
        return _convert(doc) 
开发者ID:uwdata,项目名称:errudite,代码行数:26,代码来源:helpers.py

示例4: span_to_json

# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def span_to_json(sentence: Span, sid: int = 0) -> List[Dict]:
    '''
    @param  <Span> sentence: sentence in span type
    @return <Dict> json-seralized sentences
    '''
    if sentence is None:
        return None
    j_sentence = [{
        'idx': t.i,
        'text': t.text,
        'ner': t.ent_type_,
        'lemma': t.lemma_,
        'pos': t.pos_,
        'tag': t.tag_,
        'whitespace': t.whitespace_,
        'sid': sid #,
        #'matches': []
        } for t in sentence]
    return j_sentence 
开发者ID:uwdata,项目名称:errudite,代码行数:21,代码来源:helpers.py

示例5: span_to_bigram

# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def span_to_bigram(span: Span) -> list:

        grams = []

        for a_idx, a in enumerate(span):
            for b_idx, b in enumerate(span):

                dist = b_idx - a_idx
                if dist == 0:
                    continue

                elif abs(dist) <= MARKOV_WINDOW_SIZE:
                    grams.append([a, b, dist])

        return grams 
开发者ID:csvance,项目名称:armchair-expert,代码行数:17,代码来源:markov_engine.py

示例6: __init__

# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def __init__(
        self,
        span,
        mention_index,
        utterance_index,
        utterances_start_sent,
        speaker=None,
        gold_label=None,
    ):
        """
        Arguments:
            span (spaCy Span): the spaCy span from which creating the Mention object
            mention_index (int): index of the Mention in the Document
            utterance_index (int): index of the utterance of the Mention in the Document
            utterances_start_sent (int): index of the first sentence of the utterance of the Mention in the Document
                (an utterance can comprise several sentences)
            speaker (Speaker): the speaker of the mention
            gold_label (anything): a gold label associated to the Mention (for training)
        """
        self.index = mention_index
        self.utterance_index = utterance_index
        self.utterances_sent = utterances_start_sent + self._get_doc_sent_number()
        self.speaker = speaker
        self.gold_label = gold_label
        self.spans_embeddings = None
        self.words_embeddings = None
        self.features = None

        self.features_ = None
        self.spans_embeddings_ = None
        self.words_embeddings_ = None

        self.mention_type = self._get_type()
        self.propers = set(self.content_words)
        self.entity_label = self._get_entity_label()
        self.in_entities = self._get_in_entities() 
开发者ID:huggingface,项目名称:neuralcoref,代码行数:38,代码来源:document.py

示例7: _get_type

# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def _get_type(self):
        """ Find the type of the Span """
        conj = ["CC", ","]
        prp = ["PRP", "PRP$"]
        proper = ["NNP", "NNPS"]
        if any(t.tag_ in conj and t.ent_type_ not in ACCEPTED_ENTS for t in self):
            mention_type = MENTION_TYPE["LIST"]
        elif self.root.tag_ in prp:
            mention_type = MENTION_TYPE["PRONOMINAL"]
        elif self.root.ent_type_ in ACCEPTED_ENTS or self.root.tag_ in proper:
            mention_type = MENTION_TYPE["PROPER"]
        else:
            mention_type = MENTION_TYPE["NOMINAL"]
        return mention_type 
开发者ID:huggingface,项目名称:neuralcoref,代码行数:16,代码来源:document.py

示例8: heads_agree

# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def heads_agree(self, mention2):
        """ Does the root of the Mention match the root of another Mention/Span"""
        # we allow same-type NEs to not match perfectly,
        # but rather one could be included in the other, e.g., "George" -> "George Bush"
        if (
            self.in_entities
            and mention2.in_entities
            and self.entity_label == mention2.entity_label
            and (
                self.root.lower_ in mention2.lower_
                or mention2.root.lower_ in self.lower_
            )
        ):
            return True
        return self.root.lower_ == mention2.root.lower_ 
开发者ID:huggingface,项目名称:neuralcoref,代码行数:17,代码来源:document.py

示例9: exact_match

# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def exact_match(self, mention2):
        """ Does the Mention lowercase text matches another Mention/Span lowercase text"""
        return self.lower_ == mention2.lower_ 
开发者ID:huggingface,项目名称:neuralcoref,代码行数:5,代码来源:document.py

示例10: default

# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def default(self, obj):
        if isinstance(obj, Span): return str(obj)
        return json.JSONEncoder.default(self, obj) 
开发者ID:MichaMucha,项目名称:pydata2019-nlp-system,代码行数:5,代码来源:mq.py

示例11: gen_text_from_sent_list

# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def gen_text_from_sent_list(sentences: List[Span]) -> str:
    '''
    #TODO: to comment
    '''
    return ''.join([s.text + s[-1].whitespace_ for s in sentences]) 
开发者ID:uwdata,项目名称:errudite,代码行数:7,代码来源:helpers.py

示例12: spans_to_json

# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def spans_to_json(sentences: List[Span]) -> Dict:
    '''
    @param  <Span[]> sentences: sentence in span type
    @return <Dict> json-seralized sentences
    '''
    spans = []
    for sid, sentence in enumerate(sentences):
        spans += span_to_json(sentence, sid=sid)
    return spans 
开发者ID:uwdata,项目名称:errudite,代码行数:11,代码来源:helpers.py

示例13: length

# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def length(
    docs: Union['Target', Span, List[Union['Target', Span]]]) -> int:
    """
    The length of a given span, in tokens.
    If the input is a List, take the min length of all spans in the list.
    
    Parameters
    ----------
    docs : Union[Target, Span, List[Union[Target, Span]]]
        The input doc(s) for computing the length.
    
    Returns
    -------
    int
        The length.
    """
    output = 0
    try:
        def length_(doc):
            return len(convert_doc(doc)) if doc else 0
        if docs is None:
            raise DSLValueError(f"No valid input to [ length ]. input: {docs}")
        elif type(docs) == list and len(docs) > 0:
            output = min([ length_(doc) for doc in docs ]) # convert_token
        else:
            output = length_(docs) # convert_token
    except DSLValueError as e:
        #logger.error(e)
        raise(e)
    except Exception as e:
        #print(f'[is_digit]')
        #traceback.print_exc()
        ex = Exception(f"Unknown exception from [ length ]: {e}")
        #logger.error(ex)
        raise(ex)
    #finally:
    else:
        #pass
        return output 
开发者ID:uwdata,项目名称:errudite,代码行数:41,代码来源:length.py

示例14: STRING

# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def STRING(target: Union['Target', Span]) -> str:
    """Get the raw string from a given span or target.
    
    Parameters
    ----------
    target : Union[Target, Span]
        The target to be converted to string.
    
    Returns
    -------
    str
        The string.
    """
    output = ""
    try:
        if not target:
            raise DSLValueError(f"No valid input to [ STRING ]. target: {target}")
        else:
            target = convert_list(target)
            doc = convert_doc(target)[0]
            if 'label' in target[0].__class__.__name__.lower():
                output = target[0].get_label()
            elif doc:
                output = doc.text
    except DSLValueError as e:
        #logger.error(e)
        raise(e)
    except Exception as e:
        ex = Exception(f"Unknown exception from [ STRING ]: {e}")
        #logger.error(ex)
        raise(ex)
    #finally:
    else:
        #pass
        return output 
开发者ID:uwdata,项目名称:errudite,代码行数:37,代码来源:linguistic.py

示例15: _compute_span_info

# 需要导入模块: from spacy import tokens [as 别名]
# 或者: from spacy.tokens import Span [as 别名]
def _compute_span_info(self, 
        instance: Instance, spans: Span, feature_list: List[str], target: str, info_idxes):
        if target not in instance.entries:
            target_name = f'prediction(model="{target}")'
            target = 'predictions'
        else:
            target_name = target
        if len(list(np.unique(feature_list))) > 2:
            return info_idxes
        span_features = [ get_token_feature(t, feature_list[idx]).strip() for idx, t in enumerate(spans) ]
        if any([not s or s in ["(", ")", ","] for s in span_features]):
            return info_idxes
        if any([ f not in VBs +WHs + NNs and feature_list[idx] == 'tag' for idx, f in enumerate(span_features) ]):
            return info_idxes
        pattern = ' '.join(span_features)
        if pattern not in info_idxes[target]:
            info_idxes[target][pattern] = defaultdict(dict)
            info_idxes[target][pattern]['cover'] = defaultdict(dict)
        if target != 'predictions':
            info_idxes[target][pattern]['cover']['total'][instance.key()] = True
        predictions = instance.get_entry('predictions') or []    
        for prediction in predictions:
            model = prediction.model
            if target == 'predictions': 
                if model not in target_name:
                    continue
                else:
                    info_idxes[target][pattern]['cover'][model][instance.key()] = True      
            if instance.is_incorrect(model):
                info_idxes[target][pattern][model][instance.key()] = True
            #print(model, instance.is_incorrect(model))
            #print(info_idxes[target][pattern][model])
        return info_idxes 
开发者ID:uwdata,项目名称:errudite,代码行数:35,代码来源:dataset_reader.py


注:本文中的spacy.tokens.Span方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。