本文整理汇总了Python中allennlp.data.tokenizers.Tokenizer.from_params方法的典型用法代码示例。如果您正苦于以下问题:Python Tokenizer.from_params方法的具体用法?Python Tokenizer.from_params怎么用?Python Tokenizer.from_params使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.data.tokenizers.Tokenizer
的用法示例。
在下文中一共展示了Tokenizer.from_params方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: from_params
# 需要导入模块: from allennlp.data.tokenizers import Tokenizer [as 别名]
# 或者: from allennlp.data.tokenizers.Tokenizer import from_params [as 别名]
def from_params(cls, params: Params) -> 'FEVERSentenceReader':
claim_tokenizer = Tokenizer.from_params(params.pop('claim_tokenizer', {}))
wiki_tokenizer = Tokenizer.from_params(params.pop('wiki_tokenizer', {}))
token_indexers = TokenIndexer.dict_from_params(params.pop('token_indexers', {}))
db = FeverDocDB(params.pop("db_path","data/fever/fever.db"))
params.assert_empty(cls.__name__)
return FEVERSentenceReader(db=db,
claim_tokenizer=claim_tokenizer,
wiki_tokenizer=wiki_tokenizer,
token_indexers=token_indexers)
示例2: from_params
# 需要导入模块: from allennlp.data.tokenizers import Tokenizer [as 别名]
# 或者: from allennlp.data.tokenizers.Tokenizer import from_params [as 别名]
def from_params(cls, params: Params) -> 'FEVERReader':
claim_tokenizer = Tokenizer.from_params(params.pop('claim_tokenizer', {}))
wiki_tokenizer = Tokenizer.from_params(params.pop('wiki_tokenizer', {}))
token_indexers = TokenIndexer.dict_from_params(params.pop('token_indexers', {}))
sentence_level = params.pop("sentence_level",False)
db = FeverDocDB(params.pop("db_path","data/fever.db"))
params.assert_empty(cls.__name__)
return FEVERReader(db=db,
sentence_level=sentence_level,
claim_tokenizer=claim_tokenizer,
wiki_tokenizer=wiki_tokenizer,
token_indexers=token_indexers)
示例3: from_params
# 需要导入模块: from allennlp.data.tokenizers import Tokenizer [as 别名]
# 或者: from allennlp.data.tokenizers.Tokenizer import from_params [as 别名]
def from_params(cls, params: Params) -> 'ArcMultiChoiceJsonReader':
tokenizer = Tokenizer.from_params(params.pop('tokenizer', {}))
token_indexers = token_indexer_dict_from_params(params.pop('token_indexers', {}))
choice_value_type = params.get('choice_value_type', None)
question_value_type = params.get('question_value_type', None)
lazy = params.pop('lazy', False)
return ArcMultiChoiceJsonReader(tokenizer=tokenizer,
token_indexers=token_indexers,
choice_value_type=choice_value_type,
question_value_type=question_value_type,
lazy=lazy)
示例4: from_params
# 需要导入模块: from allennlp.data.tokenizers import Tokenizer [as 别名]
# 或者: from allennlp.data.tokenizers.Tokenizer import from_params [as 别名]
def from_params(cls, params: Params) -> 'SimpleOverlapReader':
tokenizer = Tokenizer.from_params(params.pop('tokenizer', {}))
params.assert_empty(cls.__name__)
return SimpleOverlapReader(tokenizer=tokenizer)
示例5: from_params
# 需要导入模块: from allennlp.data.tokenizers import Tokenizer [as 别名]
# 或者: from allennlp.data.tokenizers.Tokenizer import from_params [as 别名]
def from_params(cls, params: Params) -> 'EntailmentTupleReader':
tokenizer = Tokenizer.from_params(params.pop('tokenizer', {}))
token_indexers = TokenIndexer.dict_from_params(params.pop('token_indexers', {}))
max_tuples = params.pop('max_tuples', 30)
max_tokens = params.pop('max_tokens', 200)
params.assert_empty(cls.__name__)
return EntailmentTupleReader(max_tokens=max_tokens,
max_tuples=max_tuples,
tokenizer=tokenizer,
token_indexers=token_indexers)
示例6: from_params
# 需要导入模块: from allennlp.data.tokenizers import Tokenizer [as 别名]
# 或者: from allennlp.data.tokenizers.Tokenizer import from_params [as 别名]
def from_params(cls, params: Params) -> 'AclSectionTitleDatasetReader':
lazy = params.pop('lazy', False)
tokenizer = Tokenizer.from_params(params.pop('tokenizer', {}))
with_elmo = params.pop_bool("with_elmo", False)
params.assert_empty(cls.__name__)
return cls(lazy=lazy, tokenizer=tokenizer,
with_elmo=with_elmo)
示例7: from_params
# 需要导入模块: from allennlp.data.tokenizers import Tokenizer [as 别名]
# 或者: from allennlp.data.tokenizers.Tokenizer import from_params [as 别名]
def from_params(cls, params: Params) -> 'SciciteDatasetReader':
lazy = params.pop('lazy', False)
tokenizer = Tokenizer.from_params(params.pop('tokenizer', {}))
use_lexicon_features = params.pop_bool("use_lexicon_features", False)
use_sparse_lexicon_features = params.pop_bool("use_sparse_lexicon_features", False)
multilabel = params.pop_bool("multilabel")
with_elmo = params.pop_bool("with_elmo", False)
reader_format = params.pop("reader_format", 'flat')
params.assert_empty(cls.__name__)
return cls(lazy=lazy, tokenizer=tokenizer,
use_lexicon_features=use_lexicon_features,
use_sparse_lexicon_features=use_sparse_lexicon_features,
multilabel=multilabel,
with_elmo=with_elmo,
reader_format=reader_format)
示例8: from_params
# 需要导入模块: from allennlp.data.tokenizers import Tokenizer [as 别名]
# 或者: from allennlp.data.tokenizers.Tokenizer import from_params [as 别名]
def from_params(cls, params: Params) -> 'AclarcDatasetReader':
lazy = params.pop('lazy', False)
tokenizer = Tokenizer.from_params(params.pop('tokenizer', {}))
use_lexicon_features = params.pop_bool("use_lexicon_features", False)
use_sparse_lexicon_features = params.pop_bool("use_sparse_lexicon_features", False)
with_elmo = params.pop_bool("with_elmo", False)
params.assert_empty(cls.__name__)
return cls(lazy=lazy, tokenizer=tokenizer,
use_lexicon_features=use_lexicon_features,
use_sparse_lexicon_features=use_sparse_lexicon_features,
with_elmo=with_elmo)
示例9: from_params
# 需要导入模块: from allennlp.data.tokenizers import Tokenizer [as 别名]
# 或者: from allennlp.data.tokenizers.Tokenizer import from_params [as 别名]
def from_params(cls, params: Params) -> 'SwagReader':
tokenizer = Tokenizer.from_params(params.pop('tokenizer', {}))
token_indexers = TokenIndexer.dict_from_params(params.pop('token_indexers', {}))
use_only_gold_examples = params.pop('use_only_gold_examples', False)
params.assert_empty(cls.__name__)
return cls(tokenizer=tokenizer,
token_indexers=token_indexers,
use_only_gold_examples=use_only_gold_examples)
示例10: from_params
# 需要导入模块: from allennlp.data.tokenizers import Tokenizer [as 别名]
# 或者: from allennlp.data.tokenizers.Tokenizer import from_params [as 别名]
def from_params(cls, params: Params) -> 'SwagReader':
tokenizer = Tokenizer.from_params(params.pop('tokenizer', {}))
token_indexers = TokenIndexer.dict_from_params(params.pop('token_indexers', {}))
use_only_gold_examples = params.pop('use_only_gold_examples', False)
only_end = params.pop('only_end', False)
params.assert_empty(cls.__name__)
return cls(tokenizer=tokenizer,
token_indexers=token_indexers,
use_only_gold_examples=use_only_gold_examples,
only_end=only_end)
示例11: from_params
# 需要导入模块: from allennlp.data.tokenizers import Tokenizer [as 别名]
# 或者: from allennlp.data.tokenizers.Tokenizer import from_params [as 别名]
def from_params(cls, params: Params) -> 'ArcMultiChoiceJsonReader':
tokenizer = Tokenizer.from_params(params.pop('tokenizer', {}))
token_indexers = TokenIndexer.dict_from_params(params.pop('token_indexers', {}))
return ArcMultiChoiceJsonReader(tokenizer=tokenizer,
token_indexers=token_indexers)
示例12: text_to_instance
# 需要导入模块: from allennlp.data.tokenizers import Tokenizer [as 别名]
# 或者: from allennlp.data.tokenizers.Tokenizer import from_params [as 别名]
def text_to_instance(self, # type: ignore
question_text: str,
passage_text: str,
question_id: str,
char_spans: List[Tuple[int, int]] = None,
answer_texts: List[str] = None,
passage_tokens: List[Token] = None) -> Instance:
# pylint: disable=arguments-differ
if not passage_tokens:
passage_tokens = self._tokenizer.tokenize(passage_text)
char_spans = char_spans or []
# We need to convert character indices in `passage_text` to token indices in
# `passage_tokens`, as the latter is what we'll actually use for supervision.
token_spans: List[Tuple[int, int]] = []
passage_offsets = [(token.idx, token.idx + len(token.text)) for token in passage_tokens]
for char_span_start, char_span_end in char_spans:
(span_start, span_end), error = util.char_span_to_token_span(passage_offsets,
(char_span_start, char_span_end))
if error:
logger.debug("Passage: %s", passage_text)
logger.debug("Passage tokens: %s", passage_tokens)
logger.debug("Question text: %s", question_text)
logger.debug("Answer span: (%d, %d)", char_span_start, char_span_end)
logger.debug("Token span: (%d, %d)", span_start, span_end)
logger.debug("Tokens in answer: %s", passage_tokens[span_start:span_end + 1])
logger.debug("Answer: %s", passage_text[char_span_start:char_span_end])
token_spans.append((span_start, span_end))
return make_reading_comprehension_instance(
self._tokenizer.tokenize(question_text),
passage_tokens,
self._token_indexers,
passage_text,
question_id,
token_spans,
answer_texts)
# @classmethod
# def from_params(cls, params: Params) -> 'Squad2Reader':
# tokenizer = Tokenizer.from_params(params.pop('tokenizer', {}))
# token_indexers = TokenIndexer.dict_from_params(params.pop('token_indexers', {}))
# lazy = params.pop('lazy', False)
# params.assert_empty(cls.__name__)
# return cls(tokenizer=tokenizer, token_indexers=token_indexers, lazy=lazy)