当前位置: 首页>>代码示例>>Python>>正文


Python Example.fromlist方法代码示例

本文整理汇总了Python中torchtext.data.Example.fromlist方法的典型用法代码示例。如果您正苦于以下问题:Python Example.fromlist方法的具体用法?Python Example.fromlist怎么用?Python Example.fromlist使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在torchtext.data.Example的用法示例。


在下文中一共展示了Example.fromlist方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: df_to_dialogue_examples

# 需要导入模块: from torchtext.data import Example [as 别名]
# 或者: from torchtext.data.Example import fromlist [as 别名]
def df_to_dialogue_examples(df: pd.DataFrame, *, fields: List[Tuple[str, Field]], batch_col: str,
                            role_col: str, text_col: str, sort_col: str, max_sl=1000) -> Iterator[Example]:
    """convert df to dialogue examples"""
    df = [df] if not isinstance(df, list) else df
    tokenize = fields[0][1].tokenize
    for file_index, _df in enumerate(df):
        for chat_id, conversation in tqdm(_df.groupby(batch_col), desc=f"processed file {file_index}/{len(df)}"):
            if conversation[role_col].nunique() > 1:
                conversation = conversation.sort_values(by=sort_col)
                conversation_tokens = "__" + conversation[role_col] + "__"
                text_with_roles = (conversation_tokens + " " + conversation[text_col]).astype(str)
                text_with_roles_length = text_with_roles.apply(lambda x: len(tokenize(x)))
                text = "".join(text_with_roles.str.cat(sep=" "))
                roles = "".join(conversation_tokens.str.cat(sep=" "))
                example = Example.fromlist([text.strip(), roles.strip()], fields)
                example.sl = text_with_roles_length.tolist()
                # sanity check if the sl is much larger than expected ignore
                if max(example.sl) < max_sl:
                    yield example 
开发者ID:outcastofmusic,项目名称:quick-nlp,代码行数:21,代码来源:datasets.py

示例2: __init__

# 需要导入模块: from torchtext.data import Example [as 别名]
# 或者: from torchtext.data.Example import fromlist [as 别名]
def __init__(self, path, fields, encoding="utf-8", separator=' ||| ', **kwargs):
        examples = []
        with open(path, "r", encoding=encoding) as f:
            
            for inst in f:
                sentence, actions = inst.split(separator)

                # Make sure there is no leading/trailing whitespace
                sentence = sentence.strip().split()
                actions = actions.strip().split()

                examples.append(Example.fromlist((sentence, actions), fields))
        super(TransitionDataset, self).__init__(examples, fields, **kwargs) 
开发者ID:smilelight,项目名称:lightNLP,代码行数:15,代码来源:dataset.py

示例3: __init__

# 需要导入模块: from torchtext.data import Example [as 别名]
# 或者: from torchtext.data.Example import fromlist [as 别名]
def __init__(self, path, fields, window_size=3, tokenize=default_tokenize, encoding="utf-8", **kwargs):
        examples = []
        with open(path, "r", encoding=encoding) as f:
            for line in f:
                words = tokenize(line.strip())
                if len(words) < window_size + 1:
                    continue
                for i in range(len(words)):
                    example = (words[max(0, i - window_size):i] +
                               words[min(i+1, len(words)):min(len(words), i + window_size) + 1], words[i])
                    examples.append(Example.fromlist(example, fields))
        super(CBOWDataset, self).__init__(examples, fields, **kwargs) 
开发者ID:smilelight,项目名称:lightNLP,代码行数:14,代码来源:dataset.py

示例4: __init__

# 需要导入模块: from torchtext.data import Example [as 别名]
# 或者: from torchtext.data.Example import fromlist [as 别名]
def __init__(self, path, fields, window_size=3, tokenize=default_tokenize, encoding="utf-8", **kwargs):
        examples = []
        with open(path, "r", encoding=encoding) as f:
            for line in f:
                words = tokenize(line.strip())
                if len(words) < window_size + 1:
                    continue
                for i in range(len(words)):
                    contexts = words[max(0, i - window_size):i] + \
                               words[min(i+1, len(words)):min(len(words), i + window_size) + 1]
                    for context in contexts:
                        examples.append(Example.fromlist((context, words[i]), fields))
        super(SkipGramDataset, self).__init__(examples, fields, **kwargs) 
开发者ID:smilelight,项目名称:lightNLP,代码行数:15,代码来源:dataset.py

示例5: __init__

# 需要导入模块: from torchtext.data import Example [as 别名]
# 或者: from torchtext.data.Example import fromlist [as 别名]
def __init__(self, path, fields, encoding="utf-8", **kwargs):
        examples = []
        with open(path, "r", encoding=encoding) as f:
            for line in f:
                chunks = line.split()
                entity_1, entity_2, relation, sentence = tuple(chunks)
                sentence_list = handle_line(entity_1, entity_2, sentence)

                examples.append(Example.fromlist((sentence_list, relation), fields))
        super(REDataset, self).__init__(examples, fields, **kwargs) 
开发者ID:smilelight,项目名称:lightNLP,代码行数:12,代码来源:dataset.py

示例6: make_example

# 需要导入模块: from torchtext.data import Example [as 别名]
# 或者: from torchtext.data.Example import fromlist [as 别名]
def make_example(oracle: Oracle, fields: List[Tuple[str, Field]]):
    nonterms = [get_nonterm(a) for a in oracle.actions if is_nt(a)]
    return Example.fromlist(
        [oracle.actions, nonterms, oracle.pos_tags, oracle.words], fields
    ) 
开发者ID:kmkurn,项目名称:pytorch-rnng,代码行数:7,代码来源:example.py

示例7: get_examples_from_file

# 需要导入模块: from torchtext.data import Example [as 别名]
# 或者: from torchtext.data.Example import fromlist [as 别名]
def get_examples_from_file(self, path: str, fields: List[NamedField], format: str, encoding: str = 'utf-8',
                               skip_header: bool = True) -> Tuple[List[Example], List[NamedField]]:
        if format.lower() in ["csv", "tsv"]:
            sep = "," if format.lower() == "csv" else "\t"
            data = pd.read_csv(os.path.expanduser(path), encoding=encoding, header=0 if skip_header else None,
                               sep=sep)
        elif format.lower() == "json":
            data = pd.read_json(os.path.expanduser(path), encoding=encoding)
        examples = []
        for _, row in data.iterrows():
            examples.append(Example.fromlist(row.values.tolist(), fields))
        return examples, fields 
开发者ID:outcastofmusic,项目名称:quick-nlp,代码行数:14,代码来源:datasets.py

示例8: __init__

# 需要导入模块: from torchtext.data import Example [as 别名]
# 或者: from torchtext.data.Example import fromlist [as 别名]
def __init__(self, df, fields, **kwargs):
        df = df.loc[:, self.columns(fields)]
        examples = []
        for index, row in df.iterrows():
            example = Example.fromlist(row.tolist(), fields)
            examples.append(example)

        super().__init__(examples, fields, **kwargs) 
开发者ID:outcastofmusic,项目名称:quick-nlp,代码行数:10,代码来源:datasets.py

示例9: json_to_dialogue_examples

# 需要导入模块: from torchtext.data import Example [as 别名]
# 或者: from torchtext.data.Example import fromlist [as 别名]
def json_to_dialogue_examples(path_dir: Path, *, fields: List[Tuple[str, Field]], utterance_key: str, role_key: str,
                              text_key: str, sort_key: str, max_sl: int = 1000,
                              target_roles: Optional[List[str]] = None) -> \
        Iterator[Example]:
    """Load dialogues from json files
    a json file should have a List of Dicts, see examples:
     [{batch_col:chat_id, utterance_col:[{text_col:message, role_col:role, sort_col:timestamp}]}]

    """
    for file_index, file in enumerate(path_dir.glob("*.json")):
        with file.open('r', encoding='utf-8') as fh:
            dialogues = json.load(fh)
        for dialogue in tqdm(dialogues, desc=f'processed file {file}'):
            if isinstance(sort_key, str):
                key = itemgetter(sort_key)
            elif callable(sort_key):
                key = sort_key
            else:
                raise ValueError("Invalid sort_key provided")
            conversation = sorted(dialogue[utterance_key], key=key)
            text = ""
            roles = ""
            lengths = []
            tokenize = fields[0][1].tokenize
            for utterance in conversation:
                ut = utterance[text_key]
                ut = " ".join(ut) if isinstance(ut, list) else ut
                conv_role = "__" + utterance[role_key] + "__"
                text_with_role = conv_role + " " + ut
                if text.strip() != "":
                    if target_roles is None or utterance[role_key] in target_roles:
                        example = Example.fromlist([text.strip(), roles.strip(), text_with_role], fields)
                        example.sl = [i for i in lengths]
                        # sanity check if the sl is much larger than expected ignore
                        assert len(lengths) == len(roles.split())
                        if max(example.sl) < max_sl:
                            yield example
                text += " " + text_with_role
                roles += " " + conv_role
                lengths.append(len(tokenize(text_with_role))) 
开发者ID:outcastofmusic,项目名称:quick-nlp,代码行数:42,代码来源:datasets.py

示例10: predict

# 需要导入模块: from torchtext.data import Example [as 别名]
# 或者: from torchtext.data.Example import fromlist [as 别名]
def predict(self, examples, batch_size=1):
        """Create Predictions for a list of examples.

           Args:
             examples: A dict  mapping field names to the
               list of raw examples (strings).
             batch_size: Batch Size to use. Default 1.

           Returns:
             A dict mapping prediction levels
             (word, sentence ..) to the model predictions
             for each example.

           Raises:
             Exception: If an example has an empty string
               as `source` or `target` field.

           Example:
             >>> import kiwi
             >>> predictor = kiwi.load_model('tests/toy-data/models/nuqe.torch')
             >>> src = ['a b c', 'd e f g']
             >>> tgt = ['q w e r', 't y']
             >>> align = ['0-0 1-1 1-2', '1-1 3-0']
             >>> examples = {kiwi.constants.SOURCE: src,
                             kiwi.constants.TARGET: tgt,
                             kiwi.constants.ALIGNMENTS: align}
             >>> predictor.predict(examples)
             {'tags': [[0.4760947525501251,
                0.47569847106933594,
                0.4948718547821045,
                0.5305878520011902],
               [0.5105430483818054, 0.5252899527549744]]}
        """
        if not examples:
            return defaultdict(list)
        if self.fields is None:
            raise Exception('Missing fields object.')

        if not examples.get(const.SOURCE):
            raise KeyError('Missing required field "{}"'.format(const.SOURCE))
        if not examples.get(const.TARGET):
            raise KeyError('Missing required field "{}"'.format(const.TARGET))

        if not all(
            [s.strip() for s in examples[const.SOURCE] + examples[const.TARGET]]
        ):
            raise Exception(
                'Empty String in {} or {} field found!'.format(
                    const.SOURCE, const.TARGET
                )
            )
        fields = [(name, self.fields[name]) for name in examples]

        field_examples = [
            Example.fromlist(values, fields)
            for values in zip(*examples.values())
        ]

        dataset = QEDataset(field_examples, fields=fields)

        return self.run(dataset, batch_size) 
开发者ID:Unbabel,项目名称:OpenKiwi,代码行数:63,代码来源:predictor.py


注:本文中的torchtext.data.Example.fromlist方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。