本文整理汇总了Python中torchtext.data.Example.fromlist方法的典型用法代码示例。如果您正苦于以下问题:Python Example.fromlist方法的具体用法?Python Example.fromlist怎么用?Python Example.fromlist使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torchtext.data.Example
的用法示例。
在下文中一共展示了Example.fromlist方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: df_to_dialogue_examples
# 需要导入模块: from torchtext.data import Example [as 别名]
# 或者: from torchtext.data.Example import fromlist [as 别名]
def df_to_dialogue_examples(df: pd.DataFrame, *, fields: List[Tuple[str, Field]], batch_col: str,
role_col: str, text_col: str, sort_col: str, max_sl=1000) -> Iterator[Example]:
"""convert df to dialogue examples"""
df = [df] if not isinstance(df, list) else df
tokenize = fields[0][1].tokenize
for file_index, _df in enumerate(df):
for chat_id, conversation in tqdm(_df.groupby(batch_col), desc=f"processed file {file_index}/{len(df)}"):
if conversation[role_col].nunique() > 1:
conversation = conversation.sort_values(by=sort_col)
conversation_tokens = "__" + conversation[role_col] + "__"
text_with_roles = (conversation_tokens + " " + conversation[text_col]).astype(str)
text_with_roles_length = text_with_roles.apply(lambda x: len(tokenize(x)))
text = "".join(text_with_roles.str.cat(sep=" "))
roles = "".join(conversation_tokens.str.cat(sep=" "))
example = Example.fromlist([text.strip(), roles.strip()], fields)
example.sl = text_with_roles_length.tolist()
# sanity check if the sl is much larger than expected ignore
if max(example.sl) < max_sl:
yield example
示例2: __init__
# 需要导入模块: from torchtext.data import Example [as 别名]
# 或者: from torchtext.data.Example import fromlist [as 别名]
def __init__(self, path, fields, encoding="utf-8", separator=' ||| ', **kwargs):
examples = []
with open(path, "r", encoding=encoding) as f:
for inst in f:
sentence, actions = inst.split(separator)
# Make sure there is no leading/trailing whitespace
sentence = sentence.strip().split()
actions = actions.strip().split()
examples.append(Example.fromlist((sentence, actions), fields))
super(TransitionDataset, self).__init__(examples, fields, **kwargs)
示例3: __init__
# 需要导入模块: from torchtext.data import Example [as 别名]
# 或者: from torchtext.data.Example import fromlist [as 别名]
def __init__(self, path, fields, window_size=3, tokenize=default_tokenize, encoding="utf-8", **kwargs):
examples = []
with open(path, "r", encoding=encoding) as f:
for line in f:
words = tokenize(line.strip())
if len(words) < window_size + 1:
continue
for i in range(len(words)):
example = (words[max(0, i - window_size):i] +
words[min(i+1, len(words)):min(len(words), i + window_size) + 1], words[i])
examples.append(Example.fromlist(example, fields))
super(CBOWDataset, self).__init__(examples, fields, **kwargs)
示例4: __init__
# 需要导入模块: from torchtext.data import Example [as 别名]
# 或者: from torchtext.data.Example import fromlist [as 别名]
def __init__(self, path, fields, window_size=3, tokenize=default_tokenize, encoding="utf-8", **kwargs):
examples = []
with open(path, "r", encoding=encoding) as f:
for line in f:
words = tokenize(line.strip())
if len(words) < window_size + 1:
continue
for i in range(len(words)):
contexts = words[max(0, i - window_size):i] + \
words[min(i+1, len(words)):min(len(words), i + window_size) + 1]
for context in contexts:
examples.append(Example.fromlist((context, words[i]), fields))
super(SkipGramDataset, self).__init__(examples, fields, **kwargs)
示例5: __init__
# 需要导入模块: from torchtext.data import Example [as 别名]
# 或者: from torchtext.data.Example import fromlist [as 别名]
def __init__(self, path, fields, encoding="utf-8", **kwargs):
examples = []
with open(path, "r", encoding=encoding) as f:
for line in f:
chunks = line.split()
entity_1, entity_2, relation, sentence = tuple(chunks)
sentence_list = handle_line(entity_1, entity_2, sentence)
examples.append(Example.fromlist((sentence_list, relation), fields))
super(REDataset, self).__init__(examples, fields, **kwargs)
示例6: make_example
# 需要导入模块: from torchtext.data import Example [as 别名]
# 或者: from torchtext.data.Example import fromlist [as 别名]
def make_example(oracle: Oracle, fields: List[Tuple[str, Field]]):
nonterms = [get_nonterm(a) for a in oracle.actions if is_nt(a)]
return Example.fromlist(
[oracle.actions, nonterms, oracle.pos_tags, oracle.words], fields
)
示例7: get_examples_from_file
# 需要导入模块: from torchtext.data import Example [as 别名]
# 或者: from torchtext.data.Example import fromlist [as 别名]
def get_examples_from_file(self, path: str, fields: List[NamedField], format: str, encoding: str = 'utf-8',
skip_header: bool = True) -> Tuple[List[Example], List[NamedField]]:
if format.lower() in ["csv", "tsv"]:
sep = "," if format.lower() == "csv" else "\t"
data = pd.read_csv(os.path.expanduser(path), encoding=encoding, header=0 if skip_header else None,
sep=sep)
elif format.lower() == "json":
data = pd.read_json(os.path.expanduser(path), encoding=encoding)
examples = []
for _, row in data.iterrows():
examples.append(Example.fromlist(row.values.tolist(), fields))
return examples, fields
示例8: __init__
# 需要导入模块: from torchtext.data import Example [as 别名]
# 或者: from torchtext.data.Example import fromlist [as 别名]
def __init__(self, df, fields, **kwargs):
df = df.loc[:, self.columns(fields)]
examples = []
for index, row in df.iterrows():
example = Example.fromlist(row.tolist(), fields)
examples.append(example)
super().__init__(examples, fields, **kwargs)
示例9: json_to_dialogue_examples
# 需要导入模块: from torchtext.data import Example [as 别名]
# 或者: from torchtext.data.Example import fromlist [as 别名]
def json_to_dialogue_examples(path_dir: Path, *, fields: List[Tuple[str, Field]], utterance_key: str, role_key: str,
text_key: str, sort_key: str, max_sl: int = 1000,
target_roles: Optional[List[str]] = None) -> \
Iterator[Example]:
"""Load dialogues from json files
a json file should have a List of Dicts, see examples:
[{batch_col:chat_id, utterance_col:[{text_col:message, role_col:role, sort_col:timestamp}]}]
"""
for file_index, file in enumerate(path_dir.glob("*.json")):
with file.open('r', encoding='utf-8') as fh:
dialogues = json.load(fh)
for dialogue in tqdm(dialogues, desc=f'processed file {file}'):
if isinstance(sort_key, str):
key = itemgetter(sort_key)
elif callable(sort_key):
key = sort_key
else:
raise ValueError("Invalid sort_key provided")
conversation = sorted(dialogue[utterance_key], key=key)
text = ""
roles = ""
lengths = []
tokenize = fields[0][1].tokenize
for utterance in conversation:
ut = utterance[text_key]
ut = " ".join(ut) if isinstance(ut, list) else ut
conv_role = "__" + utterance[role_key] + "__"
text_with_role = conv_role + " " + ut
if text.strip() != "":
if target_roles is None or utterance[role_key] in target_roles:
example = Example.fromlist([text.strip(), roles.strip(), text_with_role], fields)
example.sl = [i for i in lengths]
# sanity check if the sl is much larger than expected ignore
assert len(lengths) == len(roles.split())
if max(example.sl) < max_sl:
yield example
text += " " + text_with_role
roles += " " + conv_role
lengths.append(len(tokenize(text_with_role)))
示例10: predict
# 需要导入模块: from torchtext.data import Example [as 别名]
# 或者: from torchtext.data.Example import fromlist [as 别名]
def predict(self, examples, batch_size=1):
"""Create Predictions for a list of examples.
Args:
examples: A dict mapping field names to the
list of raw examples (strings).
batch_size: Batch Size to use. Default 1.
Returns:
A dict mapping prediction levels
(word, sentence ..) to the model predictions
for each example.
Raises:
Exception: If an example has an empty string
as `source` or `target` field.
Example:
>>> import kiwi
>>> predictor = kiwi.load_model('tests/toy-data/models/nuqe.torch')
>>> src = ['a b c', 'd e f g']
>>> tgt = ['q w e r', 't y']
>>> align = ['0-0 1-1 1-2', '1-1 3-0']
>>> examples = {kiwi.constants.SOURCE: src,
kiwi.constants.TARGET: tgt,
kiwi.constants.ALIGNMENTS: align}
>>> predictor.predict(examples)
{'tags': [[0.4760947525501251,
0.47569847106933594,
0.4948718547821045,
0.5305878520011902],
[0.5105430483818054, 0.5252899527549744]]}
"""
if not examples:
return defaultdict(list)
if self.fields is None:
raise Exception('Missing fields object.')
if not examples.get(const.SOURCE):
raise KeyError('Missing required field "{}"'.format(const.SOURCE))
if not examples.get(const.TARGET):
raise KeyError('Missing required field "{}"'.format(const.TARGET))
if not all(
[s.strip() for s in examples[const.SOURCE] + examples[const.TARGET]]
):
raise Exception(
'Empty String in {} or {} field found!'.format(
const.SOURCE, const.TARGET
)
)
fields = [(name, self.fields[name]) for name in examples]
field_examples = [
Example.fromlist(values, fields)
for values in zip(*examples.values())
]
dataset = QEDataset(field_examples, fields=fields)
return self.run(dataset, batch_size)