当前位置: 首页>>代码示例>>Python>>正文


Python file_utils.cached_path方法代码示例

本文整理汇总了Python中allennlp.common.file_utils.cached_path方法的典型用法代码示例。如果您正苦于以下问题:Python file_utils.cached_path方法的具体用法?Python file_utils.cached_path怎么用?Python file_utils.cached_path使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在allennlp.common.file_utils的用法示例。


在下文中一共展示了file_utils.cached_path方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _read

# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path: str):
        # if `file_path` is a URL, redirect to the cache
        file_path = cached_path(file_path)

        with open(file_path, 'r') as semeval_file:
            logger.info("Reading SemEval 2010 Task 8 instances from jsonl dataset at: %s", file_path)
            for line in semeval_file:
                example = json.loads(line)

                tokens = example["tokens"]
                label = example["label"]
                entity_indices = example["entities"]
                
                start_e1, end_e1 = entity_indices[0]
                start_e2, end_e2 = entity_indices[1]
                entity_1 = (start_e1, end_e1 - 1)
                entity_2 = (start_e2, end_e2 - 1)

                yield self.text_to_instance(tokens, entity_1, entity_2, label) 
开发者ID:DFKI-NLP,项目名称:DISTRE,代码行数:21,代码来源:semeval_2010_task_8_reader.py

示例2: _read

# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path: str) -> Iterable[Instance]:
        # if `file_path` is a URL, redirect to the cache
        file_path = cached_path(file_path)

        with open(file_path, "r") as data_file:
            logger.info("Reading instances from lines in file at: %s", file_path)

            # Group into alternative divider / sentence chunks.
            for is_divider, lines in itertools.groupby(data_file, _is_divider):
                # Ignore the divider chunks, so that `lines` corresponds to the words
                # of a single sentence.
                if not is_divider:
                    fields = [line.strip().split() for line in lines]
                    # unzipping trick returns tuples, but our Fields need lists
                    fields = [list(field) for field in zip(*fields)]
                    tokens_, _, _, pico_tags = fields
                    # TextField requires ``Token`` objects
                    tokens = [Token(token) for token in tokens_]

                    yield self.text_to_instance(tokens, pico_tags) 
开发者ID:allenai,项目名称:scibert,代码行数:22,代码来源:ebmnlp.py

示例3: __init__

# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def __init__(self, options_file: str, weight_file: str, requires_grad: bool = False) -> None:
        super().__init__()

        with open(cached_path(options_file), "r") as fin:
            self._options = json.load(fin)
        self._weight_file = weight_file

        self.output_dim = self._options["lstm"]["projection_dim"]
        self.requires_grad = requires_grad

        self._load_weights()

        # Cache the arrays for use in forward -- +1 due to masking.
        self._beginning_of_sentence_characters = torch.from_numpy(
            numpy.array(ELMoCharacterMapper.beginning_of_sentence_characters) + 1
        )
        self._end_of_sentence_characters = torch.from_numpy(
            numpy.array(ELMoCharacterMapper.end_of_sentence_characters) + 1
        ) 
开发者ID:allenai,项目名称:allennlp,代码行数:21,代码来源:elmo.py

示例4: _read

# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path: str) -> Iterable[Instance]:
        # if `file_path` is a URL, redirect to the cache
        file_path = cached_path(file_path)

        with open(file_path, "r") as data_file:
            logger.info("Reading instances from lines in file at: %s", file_path)

            # Group into alternative divider / sentence chunks.
            for is_divider, lines in itertools.groupby(data_file, _is_divider):
                # Ignore the divider chunks, so that `lines` corresponds to the words
                # of a single sentence.
                if not is_divider:
                    fields = [line.strip().split() for line in lines]
                    # unzipping trick returns tuples, but our Fields need lists
                    fields = [list(field) for field in zip(*fields)]
                    tokens_, pos_tags, chunk_tags, ner_tags = fields
                    # TextField requires `Token` objects
                    tokens = [Token(token) for token in tokens_]

                    yield self.text_to_instance(tokens, pos_tags, chunk_tags, ner_tags) 
开发者ID:allenai,项目名称:allennlp,代码行数:22,代码来源:conll2003.py

示例5: _read

# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path):
        with open(cached_path(file_path), "r") as data_file:
            for line in data_file.readlines():
                if not line:
                    continue
                items = json.loads(line)
                text = items["text"]
                label = items.get("label")
                if label is not None:
                    if self._skip_label_indexing:
                        try:
                            label = int(label)
                        except ValueError:
                            raise ValueError(
                                "Labels must be integers if skip_label_indexing is True."
                            )
                    else:
                        label = str(label)
                instance = self.text_to_instance(text=text, label=label)
                if instance is not None:
                    yield instance 
开发者ID:allenai,项目名称:allennlp,代码行数:23,代码来源:text_classification_json.py

示例6: read

# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def read(self, file_path: str):
        # if `file_path` is a URL, redirect to the cache
        file_path = cached_path(file_path)
        instances = []
        with open(file_path, 'r') as entailment_file:
            logger.info("Reading entailment instances from TSV dataset at: %s", file_path)
            for line in tqdm.tqdm(entailment_file):
                fields = line.split("\t")
                if len(fields) != 4:
                    raise ValueError("Expected four fields: "
                                     "premise   hypothesis  label   hypothesis_structure. "
                                     "Found {} fields in {}".format(len(fields), line))
                premise, hypothesis, label, hypothesis_structure = fields
                instances.append(self.text_to_instance(premise, hypothesis, hypothesis_structure,
                                                       label))
        if not instances:
            raise ConfigurationError("No instances were read from the given filepath {}. "
                                     "Is the path correct?".format(file_path))
        return Dataset(instances) 
开发者ID:allenai,项目名称:scitail,代码行数:21,代码来源:entailment_tuple_reader.py

示例7: __init__

# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def __init__(self,
                 options_file     ,
                 weight_file     ,
                 requires_grad       = False)        :
        super(_ElmoCharacterEncoder, self).__init__()

        with open(cached_path(options_file), u'r') as fin:
            self._options = json.load(fin)
        self._weight_file = weight_file

        self.output_dim = self._options[u'lstm'][u'projection_dim']
        self.requires_grad = requires_grad

        self._load_weights()

        # Cache the arrays for use in forward -- +1 due to masking.
        self._beginning_of_sentence_characters = torch.from_numpy(
                numpy.array(ELMoCharacterMapper.beginning_of_sentence_characters) + 1
        )
        self._end_of_sentence_characters = torch.from_numpy(
                numpy.array(ELMoCharacterMapper.end_of_sentence_characters) + 1
        ) 
开发者ID:plasticityai,项目名称:magnitude,代码行数:24,代码来源:elmo.py

示例8: _read

# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path):
        # if `file_path` is a URL, redirect to the cache
        file_path = cached_path(file_path)

        with open(file_path, u"r") as data_file:

            logger.info(u"Reading instances from lines in file at: %s", file_path)
            for line in data_file:
                line = line.strip(u"\n")

                # skip blank lines
                if not line:
                    continue

                tokens_and_tags = [pair.rsplit(self._word_tag_delimiter, 1)
                                   for pair in line.split(self._token_delimiter)]
                tokens = [Token(token) for token, tag in tokens_and_tags]
                tags = [tag for token, tag in tokens_and_tags]
                yield self.text_to_instance(tokens, tags) 
开发者ID:plasticityai,项目名称:magnitude,代码行数:21,代码来源:sequence_tagging.py

示例9: _read

# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path     )                      :
        # if `file_path` is a URL, redirect to the cache
        file_path = cached_path(file_path)

        with open(file_path, u"r") as data_file:
            logger.info(u"Reading instances from lines in file at: %s", file_path)

            # Group into alternative divider / sentence chunks.
            for is_divider, lines in itertools.groupby(data_file, _is_divider):
                # Ignore the divider chunks, so that `lines` corresponds to the words
                # of a single sentence.
                if not is_divider:
                    fields = [line.strip().split() for line in lines]
                    # unzipping trick returns tuples, but our Fields need lists
                    tokens, pos_tags, chunk_tags, ner_tags = [list(field) for field in izip(*fields)]
                    # TextField requires ``Token`` objects
                    tokens = [Token(token) for token in tokens]

                    yield self.text_to_instance(tokens, pos_tags, chunk_tags, ner_tags) 
开发者ID:plasticityai,项目名称:magnitude,代码行数:21,代码来源:conll2003.py

示例10: _read

# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path):
        # if `file_path` is a URL, redirect to the cache
        file_path = cached_path(file_path)
        directory, filename = os.path.split(file_path)
        logger.info(u"Reading instances from lines in file at: %s", file_path)
        for parse in BracketParseCorpusReader(root=directory, fileids=[filename]).parsed_sents():

            self._strip_functional_tags(parse)
            # This is un-needed and clutters the label space.
            # All the trees also contain a root S node.
            if parse.label() == u"VROOT":
                parse = parse[0]
            pos_tags = [x[1] for x in parse.pos()] if self._use_pos_tags else None
            yield self.text_to_instance(parse.leaves(), pos_tags, parse)

    #overrides 
开发者ID:plasticityai,项目名称:magnitude,代码行数:18,代码来源:penn_tree_bank.py

示例11: _read

# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path):
        with open(cached_path(file_path), u"r") as data_file:
            logger.info(u"Reading instances from lines in file at: %s", file_path)
            for line in data_file.readlines():
                line = line.strip(u"\n")
                if not line:
                    continue
                parsed_line = Tree.fromstring(line)
                if self._use_subtrees:
                    for subtree in parsed_line.subtrees():
                        instance = self.text_to_instance(subtree.leaves(), subtree.label())
                        if instance is not None:
                            yield instance
                else:
                    instance = self.text_to_instance(parsed_line.leaves(), parsed_line.label())
                    if instance is not None:
                        yield instance

    #overrides 
开发者ID:plasticityai,项目名称:magnitude,代码行数:21,代码来源:stanford_sentiment_tree_bank.py

示例12: _read

# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path     ):
        # if `file_path` is a URL, redirect to the cache
        file_path = cached_path(file_path)

        with open(file_path) as atis_file:
            logger.info(u"Reading ATIS instances from dataset at : %s", file_path)
            for line in _lazy_parse(atis_file.read()):
                utterances = []
                for current_interaction in line[u'interaction']:
                    if not current_interaction[u'utterance']:
                        continue
                    utterances.append(current_interaction[u'utterance'])
                    instance = self.text_to_instance(utterances, current_interaction[u'sql'])
                    if not instance:
                        continue
                    yield instance

    #overrides 
开发者ID:plasticityai,项目名称:magnitude,代码行数:20,代码来源:atis.py

示例13: _read

# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path     ):
        # if `file_path` is a URL, redirect to the cache
        file_path = cached_path(file_path)
        ontonotes_reader = Ontonotes()
        logger.info(u"Reading SRL instances from dataset files at: %s", file_path)
        if self._domain_identifier is not None:
            logger.info(u"Filtering to only include file paths containing the %s domain", self._domain_identifier)

        for sentence in self._ontonotes_subset(ontonotes_reader, file_path, self._domain_identifier):
            tokens = [Token(t) for t in sentence.words]
            if not sentence.srl_frames:
                # Sentence contains no predicates.
                tags = [u"O" for _ in tokens]
                verb_label = [0 for _ in tokens]
                yield self.text_to_instance(tokens, verb_label, tags)
            else:
                for (_, tags) in sentence.srl_frames:
                    verb_indicator = [1 if label[-2:] == u"-V" else 0 for label in tags]
                    yield self.text_to_instance(tokens, verb_indicator, tags) 
开发者ID:plasticityai,项目名称:magnitude,代码行数:21,代码来源:semantic_role_labeling.py

示例14: _read

# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path     ):
        # if `file_path` is a URL, redirect to the cache
        file_path = cached_path(file_path)

        ontonotes_reader = Ontonotes()
        for sentences in ontonotes_reader.dataset_document_iterator(file_path):
            clusters                                          = collections.defaultdict(list)

            total_tokens = 0
            for sentence in sentences:
                for typed_span in sentence.coref_spans:
                    # Coref annotations are on a _per sentence_
                    # basis, so we need to adjust them to be relative
                    # to the length of the document.
                    span_id, (start, end) = typed_span
                    clusters[span_id].append((start + total_tokens,
                                              end + total_tokens))
                total_tokens += len(sentence.words)

            canonical_clusters = canonicalize_clusters(clusters)
            yield self.text_to_instance([s.words for s in sentences], canonical_clusters)

    #overrides 
开发者ID:plasticityai,项目名称:magnitude,代码行数:25,代码来源:conll.py

示例15: _read

# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path     ):
        # if `file_path` is a URL, redirect to the cache
        file_path = cached_path(file_path)

        with open(file_path, u"r") as text_file:
            instance_strings = text_file.readlines()

        if self._tokens_per_instance is not None:
            all_text = u" ".join([x.replace(u"\n", u" ").strip() for x in instance_strings])
            tokenized_text = self._tokenizer.tokenize(all_text)
            num_tokens = self._tokens_per_instance + 1
            tokenized_strings = []
            logger.info(u"Creating dataset from all text in file: %s", file_path)
            for index in Tqdm.tqdm(range(0, len(tokenized_text) - num_tokens, num_tokens - 1)):
                tokenized_strings.append(tokenized_text[index:(index + num_tokens)])
        else:
            tokenized_strings = [self._tokenizer.tokenize(s) for s in instance_strings]

        for tokenized_string in tokenized_strings:
            input_field = TextField(tokenized_string[:-1], self._token_indexers)
            output_field = TextField(tokenized_string[1:], self._output_indexer)
            yield Instance({u'input_tokens': input_field,
                            u'output_tokens': output_field})

    #overrides 
开发者ID:plasticityai,项目名称:magnitude,代码行数:27,代码来源:language_modeling.py


注:本文中的allennlp.common.file_utils.cached_path方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。