当前位置: 首页>>代码示例>>Python>>正文


Python token_indexer.TokenIndexer方法代码示例

本文整理汇总了Python中allennlp.data.token_indexers.token_indexer.TokenIndexer方法的典型用法代码示例。如果您正苦于以下问题:Python token_indexer.TokenIndexer方法的具体用法?Python token_indexer.TokenIndexer怎么用?Python token_indexer.TokenIndexer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在allennlp.data.token_indexers.token_indexer的用法示例。


在下文中一共展示了token_indexer.TokenIndexer方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from allennlp.data.token_indexers import token_indexer [as 别名]
# 或者: from allennlp.data.token_indexers.token_indexer import TokenIndexer [as 别名]
def __init__(self,
                 tokenizer: Tokenizer,
                 source_token_indexers: Dict[str, TokenIndexer] = None,
                 target_token_indexers: Dict[str, TokenIndexer] = None,
                 source_max_tokens: int = 400,
                 target_max_tokens: int = 100,
                 separate_namespaces: bool = False,
                 target_namespace: str = "target_tokens",
                 save_copy_fields: bool = False,
                 save_pgn_fields: bool = False,
                 lowercase: bool = True,
                 lazy: bool = True) -> None:
        super().__init__(
            tokenizer=tokenizer,
            source_token_indexers=source_token_indexers,
            target_token_indexers=target_token_indexers,
            source_max_tokens=source_max_tokens,
            target_max_tokens=target_max_tokens,
            separate_namespaces=separate_namespaces,
            target_namespace=target_namespace,
            save_copy_fields=save_copy_fields,
            save_pgn_fields=save_pgn_fields,
            lowercase=lowercase,
            lazy=lazy
        ) 
开发者ID:IlyaGusev,项目名称:summarus,代码行数:27,代码来源:cnn_dailymail_json_reader.py

示例2: __init__

# 需要导入模块: from allennlp.data.token_indexers import token_indexer [as 别名]
# 或者: from allennlp.data.token_indexers.token_indexer import TokenIndexer [as 别名]
def __init__(self,
                 tokenizer: Tokenizer,
                 source_token_indexers: Dict[str, TokenIndexer] = None,
                 target_token_indexers: Dict[str, TokenIndexer] = None,
                 source_max_tokens: int = 400,
                 target_max_tokens: int = 100,
                 separate_namespaces: bool = False,
                 target_namespace: str = "target_tokens",
                 save_copy_fields: bool = False,
                 save_pgn_fields: bool = False,
                 lowercase: bool = False,
                 lazy: bool = True) -> None:
        super().__init__(
            tokenizer=tokenizer,
            source_token_indexers=source_token_indexers,
            target_token_indexers=target_token_indexers,
            source_max_tokens=source_max_tokens,
            target_max_tokens=target_max_tokens,
            separate_namespaces=separate_namespaces,
            target_namespace=target_namespace,
            save_copy_fields=save_copy_fields,
            save_pgn_fields=save_pgn_fields,
            lowercase=lowercase,
            lazy=lazy
        ) 
开发者ID:IlyaGusev,项目名称:summarus,代码行数:27,代码来源:ria_reader.py

示例3: __init__

# 需要导入模块: from allennlp.data.token_indexers import token_indexer [as 别名]
# 或者: from allennlp.data.token_indexers.token_indexer import TokenIndexer [as 别名]
def __init__(self,
                 tokenizer: Tokenizer = None,
                 source_token_indexers: Dict[str, TokenIndexer] = None,
                 max_sentences_count: int = 30,
                 sentence_max_tokens: int = 100,
                 lowercase: bool = True,
                 lazy: bool = True) -> None:
        super().__init__(
            tokenizer=tokenizer,
            source_token_indexers=source_token_indexers,
            max_sentences_count=max_sentences_count,
            sentence_max_tokens=sentence_max_tokens,
            lowercase=lowercase,
            language="ru",
            lazy=lazy
        ) 
开发者ID:IlyaGusev,项目名称:summarus,代码行数:18,代码来源:gazeta_sentence_tagger_reader.py

示例4: __init__

# 需要导入模块: from allennlp.data.token_indexers import token_indexer [as 别名]
# 或者: from allennlp.data.token_indexers.token_indexer import TokenIndexer [as 别名]
def __init__(self,
                 tokenizer: Tokenizer = None,
                 source_token_indexers: Dict[str, TokenIndexer] = None,
                 target_token_indexers: Dict[str, TokenIndexer] = None,
                 source_max_tokens: int = 400,
                 target_max_tokens: int = 100,
                 target_namespace: str = None,
                 separate_namespaces: bool = False,
                 save_copy_fields: bool = False,
                 save_pgn_fields: bool = False) -> None:
        super().__init__(
            tokenizer=tokenizer,
            source_token_indexers=source_token_indexers,
            target_token_indexers=target_token_indexers,
            source_max_tokens=source_max_tokens,
            target_max_tokens=target_max_tokens,
            target_namespace=target_namespace,
            separate_namespaces=separate_namespaces,
            save_copy_fields=save_copy_fields,
            save_pgn_fields=save_pgn_fields
        ) 
开发者ID:IlyaGusev,项目名称:summarus,代码行数:23,代码来源:contracts_reader.py

示例5: __init__

# 需要导入模块: from allennlp.data.token_indexers import token_indexer [as 别名]
# 或者: from allennlp.data.token_indexers.token_indexer import TokenIndexer [as 别名]
def __init__(self,
                 tokenizer: Tokenizer = None,
                 source_token_indexers: Dict[str, TokenIndexer] = None,
                 target_token_indexers: Dict[str, TokenIndexer] = None,
                 source_max_tokens: int = 800,
                 target_max_tokens: int = 200,
                 separate_namespaces: bool = False,
                 target_namespace: str = "target_tokens",
                 save_copy_fields: bool = False,
                 save_pgn_fields: bool = False,
                 lowercase: bool = False,
                 lazy: bool = True) -> None:
        super().__init__(
            tokenizer=tokenizer,
            source_token_indexers=source_token_indexers,
            target_token_indexers=target_token_indexers,
            source_max_tokens=source_max_tokens,
            target_max_tokens=target_max_tokens,
            separate_namespaces=separate_namespaces,
            target_namespace=target_namespace,
            save_copy_fields=save_copy_fields,
            save_pgn_fields=save_pgn_fields,
            lowercase=lowercase,
            lazy=lazy
        ) 
开发者ID:IlyaGusev,项目名称:summarus,代码行数:27,代码来源:gazeta_reader.py

示例6: __init__

# 需要导入模块: from allennlp.data.token_indexers import token_indexer [as 别名]
# 或者: from allennlp.data.token_indexers.token_indexer import TokenIndexer [as 别名]
def __init__(self, tokens: List[Token], token_indexers: Dict[str, TokenIndexer]) -> None:
        self.tokens = tokens
        self._token_indexers = token_indexers
        self._indexed_tokens: Optional[Dict[str, IndexedTokenList]] = None

        if not all(isinstance(x, (Token, SpacyToken)) for x in tokens):
            raise ConfigurationError(
                "TextFields must be passed Tokens. "
                "Found: {} with types {}.".format(tokens, [type(x) for x in tokens])
            ) 
开发者ID:allenai,项目名称:allennlp,代码行数:12,代码来源:text_field.py

示例7: as_tensor

# 需要导入模块: from allennlp.data.token_indexers import token_indexer [as 别名]
# 或者: from allennlp.data.token_indexers.token_indexer import TokenIndexer [as 别名]
def as_tensor(self,
                  padding_lengths                ,
                  cuda_device      = -1)                           :
        tensors = {}
        num_tokens = padding_lengths.get(u'num_tokens')
        for indexer_name, indexer in list(self._token_indexers.items()):
            if num_tokens is None:
                # The indexers return different lengths.
                # Get the desired_num_tokens for this indexer.
                desired_num_tokens = dict((
                        indexed_tokens_key, padding_lengths[indexed_tokens_key])
                        for indexed_tokens_key in self._indexer_name_to_indexed_token[indexer_name])
            else:
                desired_num_tokens = {indexer_name: num_tokens}

            indices_to_pad = dict((indexed_tokens_key, self._indexed_tokens[indexed_tokens_key])
                              for indexed_tokens_key in self._indexer_name_to_indexed_token[indexer_name])
            padded_array = indexer.pad_token_sequence(indices_to_pad,
                                                      desired_num_tokens, padding_lengths)
            # We use the key of the indexer to recognise what the tensor corresponds to within the
            # field (i.e. the result of word indexing, or the result of character indexing, for
            # example).
            # TODO(mattg): we might someday have a TokenIndexer that needs to use something other
            # than a LongTensor here, and it's not clear how to signal that.  Maybe we'll need to
            # add a class method to TokenIndexer to tell us the type?  But we can worry about that
            # when there's a compelling use case for it.
            indexer_tensors = dict((key, torch.LongTensor(array)) for key, array in list(padded_array.items()))
            if cuda_device > -1:
                for key in list(indexer_tensors.keys()):
                    indexer_tensors[key] = indexer_tensors[key].cuda(cuda_device)
            tensors.update(indexer_tensors)
        return tensors

    #overrides 
开发者ID:plasticityai,项目名称:magnitude,代码行数:36,代码来源:text_field.py

示例8: __init__

# 需要导入模块: from allennlp.data.token_indexers import token_indexer [as 别名]
# 或者: from allennlp.data.token_indexers.token_indexer import TokenIndexer [as 别名]
def __init__(
        self,
        lazy: bool = False,
        tables_directory: str = None,
        offline_logical_forms_directory: str = None,
        max_offline_logical_forms: int = 10,
        keep_if_no_logical_forms: bool = False,
        tokenizer: Tokenizer = None,
        question_token_indexers: Dict[str, TokenIndexer] = None,
        table_token_indexers: Dict[str, TokenIndexer] = None,
        use_table_for_vocab: bool = False,
        max_table_tokens: int = None,
        output_agendas: bool = False,
    ) -> None:
        super().__init__(lazy=lazy)
        self._tables_directory = tables_directory
        self._offline_logical_forms_directory = offline_logical_forms_directory
        self._max_offline_logical_forms = max_offline_logical_forms
        self._keep_if_no_logical_forms = keep_if_no_logical_forms
        self._tokenizer = tokenizer or SpacyTokenizer(pos_tags=True)
        self._question_token_indexers = question_token_indexers or {
            "tokens": SingleIdTokenIndexer()
        }
        self._table_token_indexers = table_token_indexers or self._question_token_indexers
        self._use_table_for_vocab = use_table_for_vocab
        self._max_table_tokens = max_table_tokens
        self._output_agendas = output_agendas 
开发者ID:allenai,项目名称:allennlp-semparse,代码行数:29,代码来源:wikitables.py

示例9: __init__

# 需要导入模块: from allennlp.data.token_indexers import token_indexer [as 别名]
# 或者: from allennlp.data.token_indexers.token_indexer import TokenIndexer [as 别名]
def __init__(self,
                 tokenizer: Tokenizer,
                 cnn_tokenized_dir: str=None,
                 dm_tokenized_dir: str=None,
                 source_token_indexers: Dict[str, TokenIndexer] = None,
                 target_token_indexers: Dict[str, TokenIndexer] = None,
                 source_max_tokens: int = 400,
                 target_max_tokens: int = 100,
                 separate_namespaces: bool = False,
                 target_namespace: str = "target_tokens",
                 save_copy_fields: bool = False,
                 save_pgn_fields: bool = False,
                 lowercase: bool = True,
                 lazy: bool = True) -> None:
        super().__init__(
            tokenizer=tokenizer,
            source_token_indexers=source_token_indexers,
            target_token_indexers=target_token_indexers,
            source_max_tokens=source_max_tokens,
            target_max_tokens=target_max_tokens,
            separate_namespaces=separate_namespaces,
            target_namespace=target_namespace,
            save_copy_fields=save_copy_fields,
            save_pgn_fields=save_pgn_fields,
            lowercase=lowercase,
            lazy=lazy
        )

        self._cnn_tokenized_dir = cnn_tokenized_dir
        self._dm_tokenized_dir = dm_tokenized_dir 
开发者ID:IlyaGusev,项目名称:summarus,代码行数:32,代码来源:cnn_dailymail_reader.py

示例10: __init__

# 需要导入模块: from allennlp.data.token_indexers import token_indexer [as 别名]
# 或者: from allennlp.data.token_indexers.token_indexer import TokenIndexer [as 别名]
def __init__(self,
                 tokenizer: Tokenizer,
                 source_token_indexers: Dict[str, TokenIndexer] = None,
                 target_token_indexers: Dict[str, TokenIndexer] = None,
                 source_max_tokens: int = 400,
                 target_max_tokens: int = 100,
                 separate_namespaces: bool = False,
                 target_namespace: str = "target_tokens",
                 save_copy_fields: bool = False,
                 save_pgn_fields: bool = False,
                 lowercase: bool = True,
                 lazy: bool = True) -> None:
        super().__init__(lazy=lazy)

        assert save_pgn_fields or save_copy_fields or (not save_pgn_fields and not save_copy_fields)

        self._lowercase = lowercase
        self._source_max_tokens = source_max_tokens
        self._target_max_tokens = target_max_tokens

        self._tokenizer = tokenizer

        tokens_indexer = {"tokens": SingleIdTokenIndexer()}
        self._source_token_indexers = source_token_indexers or tokens_indexer
        self._target_token_indexers = target_token_indexers or tokens_indexer

        self._save_copy_fields = save_copy_fields
        self._save_pgn_fields = save_pgn_fields
        self._target_namespace = "tokens"
        if separate_namespaces:
            self._target_namespace = target_namespace
            second_tokens_indexer = {"tokens": SingleIdTokenIndexer(namespace=target_namespace)}
            self._target_token_indexers = target_token_indexers or second_tokens_indexer 
开发者ID:IlyaGusev,项目名称:summarus,代码行数:35,代码来源:summarization_reader.py


注:本文中的allennlp.data.token_indexers.token_indexer.TokenIndexer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。