本文整理汇总了Python中allennlp.common.file_utils.cached_path方法的典型用法代码示例。如果您正苦于以下问题:Python file_utils.cached_path方法的具体用法?Python file_utils.cached_path怎么用?Python file_utils.cached_path使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.common.file_utils
的用法示例。
在下文中一共展示了file_utils.cached_path方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _read
# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path: str):
# if `file_path` is a URL, redirect to the cache
file_path = cached_path(file_path)
with open(file_path, 'r') as semeval_file:
logger.info("Reading SemEval 2010 Task 8 instances from jsonl dataset at: %s", file_path)
for line in semeval_file:
example = json.loads(line)
tokens = example["tokens"]
label = example["label"]
entity_indices = example["entities"]
start_e1, end_e1 = entity_indices[0]
start_e2, end_e2 = entity_indices[1]
entity_1 = (start_e1, end_e1 - 1)
entity_2 = (start_e2, end_e2 - 1)
yield self.text_to_instance(tokens, entity_1, entity_2, label)
示例2: _read
# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path: str) -> Iterable[Instance]:
# if `file_path` is a URL, redirect to the cache
file_path = cached_path(file_path)
with open(file_path, "r") as data_file:
logger.info("Reading instances from lines in file at: %s", file_path)
# Group into alternative divider / sentence chunks.
for is_divider, lines in itertools.groupby(data_file, _is_divider):
# Ignore the divider chunks, so that `lines` corresponds to the words
# of a single sentence.
if not is_divider:
fields = [line.strip().split() for line in lines]
# unzipping trick returns tuples, but our Fields need lists
fields = [list(field) for field in zip(*fields)]
tokens_, _, _, pico_tags = fields
# TextField requires ``Token`` objects
tokens = [Token(token) for token in tokens_]
yield self.text_to_instance(tokens, pico_tags)
示例3: __init__
# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def __init__(self, options_file: str, weight_file: str, requires_grad: bool = False) -> None:
super().__init__()
with open(cached_path(options_file), "r") as fin:
self._options = json.load(fin)
self._weight_file = weight_file
self.output_dim = self._options["lstm"]["projection_dim"]
self.requires_grad = requires_grad
self._load_weights()
# Cache the arrays for use in forward -- +1 due to masking.
self._beginning_of_sentence_characters = torch.from_numpy(
numpy.array(ELMoCharacterMapper.beginning_of_sentence_characters) + 1
)
self._end_of_sentence_characters = torch.from_numpy(
numpy.array(ELMoCharacterMapper.end_of_sentence_characters) + 1
)
示例4: _read
# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path: str) -> Iterable[Instance]:
# if `file_path` is a URL, redirect to the cache
file_path = cached_path(file_path)
with open(file_path, "r") as data_file:
logger.info("Reading instances from lines in file at: %s", file_path)
# Group into alternative divider / sentence chunks.
for is_divider, lines in itertools.groupby(data_file, _is_divider):
# Ignore the divider chunks, so that `lines` corresponds to the words
# of a single sentence.
if not is_divider:
fields = [line.strip().split() for line in lines]
# unzipping trick returns tuples, but our Fields need lists
fields = [list(field) for field in zip(*fields)]
tokens_, pos_tags, chunk_tags, ner_tags = fields
# TextField requires `Token` objects
tokens = [Token(token) for token in tokens_]
yield self.text_to_instance(tokens, pos_tags, chunk_tags, ner_tags)
示例5: _read
# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path):
with open(cached_path(file_path), "r") as data_file:
for line in data_file.readlines():
if not line:
continue
items = json.loads(line)
text = items["text"]
label = items.get("label")
if label is not None:
if self._skip_label_indexing:
try:
label = int(label)
except ValueError:
raise ValueError(
"Labels must be integers if skip_label_indexing is True."
)
else:
label = str(label)
instance = self.text_to_instance(text=text, label=label)
if instance is not None:
yield instance
示例6: read
# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def read(self, file_path: str):
# if `file_path` is a URL, redirect to the cache
file_path = cached_path(file_path)
instances = []
with open(file_path, 'r') as entailment_file:
logger.info("Reading entailment instances from TSV dataset at: %s", file_path)
for line in tqdm.tqdm(entailment_file):
fields = line.split("\t")
if len(fields) != 4:
raise ValueError("Expected four fields: "
"premise hypothesis label hypothesis_structure. "
"Found {} fields in {}".format(len(fields), line))
premise, hypothesis, label, hypothesis_structure = fields
instances.append(self.text_to_instance(premise, hypothesis, hypothesis_structure,
label))
if not instances:
raise ConfigurationError("No instances were read from the given filepath {}. "
"Is the path correct?".format(file_path))
return Dataset(instances)
示例7: __init__
# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def __init__(self,
options_file ,
weight_file ,
requires_grad = False) :
super(_ElmoCharacterEncoder, self).__init__()
with open(cached_path(options_file), u'r') as fin:
self._options = json.load(fin)
self._weight_file = weight_file
self.output_dim = self._options[u'lstm'][u'projection_dim']
self.requires_grad = requires_grad
self._load_weights()
# Cache the arrays for use in forward -- +1 due to masking.
self._beginning_of_sentence_characters = torch.from_numpy(
numpy.array(ELMoCharacterMapper.beginning_of_sentence_characters) + 1
)
self._end_of_sentence_characters = torch.from_numpy(
numpy.array(ELMoCharacterMapper.end_of_sentence_characters) + 1
)
示例8: _read
# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path):
# if `file_path` is a URL, redirect to the cache
file_path = cached_path(file_path)
with open(file_path, u"r") as data_file:
logger.info(u"Reading instances from lines in file at: %s", file_path)
for line in data_file:
line = line.strip(u"\n")
# skip blank lines
if not line:
continue
tokens_and_tags = [pair.rsplit(self._word_tag_delimiter, 1)
for pair in line.split(self._token_delimiter)]
tokens = [Token(token) for token, tag in tokens_and_tags]
tags = [tag for token, tag in tokens_and_tags]
yield self.text_to_instance(tokens, tags)
示例9: _read
# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path ) :
# if `file_path` is a URL, redirect to the cache
file_path = cached_path(file_path)
with open(file_path, u"r") as data_file:
logger.info(u"Reading instances from lines in file at: %s", file_path)
# Group into alternative divider / sentence chunks.
for is_divider, lines in itertools.groupby(data_file, _is_divider):
# Ignore the divider chunks, so that `lines` corresponds to the words
# of a single sentence.
if not is_divider:
fields = [line.strip().split() for line in lines]
# unzipping trick returns tuples, but our Fields need lists
tokens, pos_tags, chunk_tags, ner_tags = [list(field) for field in izip(*fields)]
# TextField requires ``Token`` objects
tokens = [Token(token) for token in tokens]
yield self.text_to_instance(tokens, pos_tags, chunk_tags, ner_tags)
示例10: _read
# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path):
# if `file_path` is a URL, redirect to the cache
file_path = cached_path(file_path)
directory, filename = os.path.split(file_path)
logger.info(u"Reading instances from lines in file at: %s", file_path)
for parse in BracketParseCorpusReader(root=directory, fileids=[filename]).parsed_sents():
self._strip_functional_tags(parse)
# This is un-needed and clutters the label space.
# All the trees also contain a root S node.
if parse.label() == u"VROOT":
parse = parse[0]
pos_tags = [x[1] for x in parse.pos()] if self._use_pos_tags else None
yield self.text_to_instance(parse.leaves(), pos_tags, parse)
#overrides
示例11: _read
# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path):
with open(cached_path(file_path), u"r") as data_file:
logger.info(u"Reading instances from lines in file at: %s", file_path)
for line in data_file.readlines():
line = line.strip(u"\n")
if not line:
continue
parsed_line = Tree.fromstring(line)
if self._use_subtrees:
for subtree in parsed_line.subtrees():
instance = self.text_to_instance(subtree.leaves(), subtree.label())
if instance is not None:
yield instance
else:
instance = self.text_to_instance(parsed_line.leaves(), parsed_line.label())
if instance is not None:
yield instance
#overrides
示例12: _read
# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path ):
# if `file_path` is a URL, redirect to the cache
file_path = cached_path(file_path)
with open(file_path) as atis_file:
logger.info(u"Reading ATIS instances from dataset at : %s", file_path)
for line in _lazy_parse(atis_file.read()):
utterances = []
for current_interaction in line[u'interaction']:
if not current_interaction[u'utterance']:
continue
utterances.append(current_interaction[u'utterance'])
instance = self.text_to_instance(utterances, current_interaction[u'sql'])
if not instance:
continue
yield instance
#overrides
示例13: _read
# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path ):
# if `file_path` is a URL, redirect to the cache
file_path = cached_path(file_path)
ontonotes_reader = Ontonotes()
logger.info(u"Reading SRL instances from dataset files at: %s", file_path)
if self._domain_identifier is not None:
logger.info(u"Filtering to only include file paths containing the %s domain", self._domain_identifier)
for sentence in self._ontonotes_subset(ontonotes_reader, file_path, self._domain_identifier):
tokens = [Token(t) for t in sentence.words]
if not sentence.srl_frames:
# Sentence contains no predicates.
tags = [u"O" for _ in tokens]
verb_label = [0 for _ in tokens]
yield self.text_to_instance(tokens, verb_label, tags)
else:
for (_, tags) in sentence.srl_frames:
verb_indicator = [1 if label[-2:] == u"-V" else 0 for label in tags]
yield self.text_to_instance(tokens, verb_indicator, tags)
示例14: _read
# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path ):
# if `file_path` is a URL, redirect to the cache
file_path = cached_path(file_path)
ontonotes_reader = Ontonotes()
for sentences in ontonotes_reader.dataset_document_iterator(file_path):
clusters = collections.defaultdict(list)
total_tokens = 0
for sentence in sentences:
for typed_span in sentence.coref_spans:
# Coref annotations are on a _per sentence_
# basis, so we need to adjust them to be relative
# to the length of the document.
span_id, (start, end) = typed_span
clusters[span_id].append((start + total_tokens,
end + total_tokens))
total_tokens += len(sentence.words)
canonical_clusters = canonicalize_clusters(clusters)
yield self.text_to_instance([s.words for s in sentences], canonical_clusters)
#overrides
示例15: _read
# 需要导入模块: from allennlp.common import file_utils [as 别名]
# 或者: from allennlp.common.file_utils import cached_path [as 别名]
def _read(self, file_path ):
# if `file_path` is a URL, redirect to the cache
file_path = cached_path(file_path)
with open(file_path, u"r") as text_file:
instance_strings = text_file.readlines()
if self._tokens_per_instance is not None:
all_text = u" ".join([x.replace(u"\n", u" ").strip() for x in instance_strings])
tokenized_text = self._tokenizer.tokenize(all_text)
num_tokens = self._tokens_per_instance + 1
tokenized_strings = []
logger.info(u"Creating dataset from all text in file: %s", file_path)
for index in Tqdm.tqdm(range(0, len(tokenized_text) - num_tokens, num_tokens - 1)):
tokenized_strings.append(tokenized_text[index:(index + num_tokens)])
else:
tokenized_strings = [self._tokenizer.tokenize(s) for s in instance_strings]
for tokenized_string in tokenized_strings:
input_field = TextField(tokenized_string[:-1], self._token_indexers)
output_field = TextField(tokenized_string[1:], self._output_indexer)
yield Instance({u'input_tokens': input_field,
u'output_tokens': output_field})
#overrides