当前位置: 首页>>代码示例>>Python>>正文


Python BertForSequenceClassification.from_pretrained方法代码示例

本文整理汇总了Python中pytorch_pretrained_bert.modeling.BertForSequenceClassification.from_pretrained方法的典型用法代码示例。如果您正苦于以下问题:Python BertForSequenceClassification.from_pretrained方法的具体用法?Python BertForSequenceClassification.from_pretrained怎么用?Python BertForSequenceClassification.from_pretrained使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pytorch_pretrained_bert.modeling.BertForSequenceClassification的用法示例。


在下文中一共展示了BertForSequenceClassification.from_pretrained方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from pytorch_pretrained_bert.modeling import BertForSequenceClassification [as 别名]
# 或者: from pytorch_pretrained_bert.modeling.BertForSequenceClassification import from_pretrained [as 别名]
def __init__(self, language=Language.ENGLISH, num_labels=2, cache_dir="."):
        """Initializes the classifier and the underlying pretrained model.

        Args:
            language (Language, optional): The pretrained model's language.
                                           Defaults to Language.ENGLISH.
            num_labels (int, optional): The number of unique labels in the
                training data. Defaults to 2.
            cache_dir (str, optional): Location of BERT's cache directory.
                Defaults to ".".
        """
        if num_labels < 2:
            raise ValueError("Number of labels should be at least 2.")

        self.language = language
        self.num_labels = num_labels
        self.cache_dir = cache_dir

        # create classifier
        self.model = BertForSequenceClassification.from_pretrained(
            language, cache_dir=cache_dir, num_labels=num_labels
        )
        self.has_cuda = self.cuda 
开发者ID:interpretml,项目名称:interpret-text,代码行数:25,代码来源:utils_bert.py

示例2: __init__

# 需要导入模块: from pytorch_pretrained_bert.modeling import BertForSequenceClassification [as 别名]
# 或者: from pytorch_pretrained_bert.modeling.BertForSequenceClassification import from_pretrained [as 别名]
def __init__(self, archive_file, model_file=None, use_cuda=False):
        if not os.path.isfile(archive_file):
            if not model_file:
                raise Exception("No model for DA-predictor is specified!")
            archive_file = cached_path(model_file)
        model_dir = os.path.dirname(os.path.abspath(__file__))
        if not os.path.exists(os.path.join(model_dir, 'checkpoints')):
            archive = zipfile.ZipFile(archive_file, 'r')
            archive.extractall(model_dir)
        
        load_dir = os.path.join(model_dir, "checkpoints/predictor/save_step_15120")
        if not os.path.exists(load_dir):
            archive = zipfile.ZipFile(f'{load_dir}.zip', 'r')
            archive.extractall(os.path.dirname(load_dir))
        
        self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", do_lower_case=False)
        self.max_seq_length = 256
        self.domain = 'restaurant'
        self.model = BertForSequenceClassification.from_pretrained(load_dir, 
            cache_dir=os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed_{}'.format(-1)), num_labels=44)
        self.device = 'cuda' if use_cuda else 'cpu'
        self.model.to(self.device) 
开发者ID:ConvLab,项目名称:ConvLab,代码行数:24,代码来源:predictor.py

示例3: save_model

# 需要导入模块: from pytorch_pretrained_bert.modeling import BertForSequenceClassification [as 别名]
# 或者: from pytorch_pretrained_bert.modeling.BertForSequenceClassification import from_pretrained [as 别名]
def save_model(self):
        """
        Method to save the trained model.
        #ToDo: Works for English Language now. Multiple language support needs to
        # be added.

        """
        # Save the model to the outputs directory for capture
        output_dir = "outputs"
        os.makedirs(output_dir, exist_ok=True)

        # Save a trained model, configuration and tokenizer
        model_to_save = (
            self.model.module if hasattr(self.model, "module") else self.model
        )

        # If we save using the predefined names, we can load using `from_pretrained`
        output_model_file = "outputs/bert-large-uncased"
        output_config_file = "outputs/bert_config.json"

        torch.save(model_to_save.state_dict(), output_model_file)
        model_to_save.config.to_json_file(output_config_file) 
开发者ID:microsoft,项目名称:nlp-recipes,代码行数:24,代码来源:sequence_classification_distributed.py

示例4: test

# 需要导入模块: from pytorch_pretrained_bert.modeling import BertForSequenceClassification [as 别名]
# 或者: from pytorch_pretrained_bert.modeling.BertForSequenceClassification import from_pretrained [as 别名]
def test(args):  # Load a trained model that you have fine-tuned (we assume evaluate on cpu)    
    processor = data_utils.AscProcessor()
    label_list = processor.get_labels()
    tokenizer = BertTokenizer.from_pretrained(modelconfig.MODEL_ARCHIVE_MAP[args.bert_model])
    eval_examples = processor.get_test_examples(args.data_dir)
    eval_features = data_utils.convert_examples_to_features(eval_examples, label_list, args.max_seq_length, tokenizer, "asc")

    logger.info("***** Running evaluation *****")
    logger.info("  Num examples = %d", len(eval_examples))
    logger.info("  Batch size = %d", args.eval_batch_size)
    all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
    all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long)
    eval_data = TensorDataset(all_input_ids, all_segment_ids, all_input_mask, all_label_ids)
    # Run prediction for full data
    eval_sampler = SequentialSampler(eval_data)
    eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size)

    model = torch.load(os.path.join(args.output_dir, "model.pt") )
    model.cuda()
    model.eval()
    
    full_logits=[]
    full_label_ids=[]
    for step, batch in enumerate(eval_dataloader):
        batch = tuple(t.cuda() for t in batch)
        input_ids, segment_ids, input_mask, label_ids = batch
        
        with torch.no_grad():
            logits = model(input_ids, segment_ids, input_mask)

        logits = logits.detach().cpu().numpy()
        label_ids = label_ids.cpu().numpy()

        full_logits.extend(logits.tolist() )
        full_label_ids.extend(label_ids.tolist() )

    output_eval_json = os.path.join(args.output_dir, "predictions.json") 
    with open(output_eval_json, "w") as fw:
        json.dump({"logits": full_logits, "label_ids": full_label_ids}, fw) 
开发者ID:howardhsu,项目名称:BERT-for-RRC-ABSA,代码行数:43,代码来源:run_asc.py

示例5: main

# 需要导入模块: from pytorch_pretrained_bert.modeling import BertForSequenceClassification [as 别名]
# 或者: from pytorch_pretrained_bert.modeling.BertForSequenceClassification import from_pretrained [as 别名]
def main():
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False

    logging.basicConfig(level=logging.INFO)

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    logging.info("Initializing model...")
    # model = BaseModel(args, use_gpu)
    model = BertForSequenceClassification.from_pretrained(args.bert_model,
                cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(-1),
                num_labels=2)

    if args.resume:
        model.load_state_dict(torch.load(args.load_model))

    if use_gpu:
        model = model.cuda()

    params = sum(np.prod(p.size()) for p in model.parameters())
    logging.info("Number of parameters: {}".format(params))

    if not os.path.isdir(args.save_dir):
        os.mkdir(args.save_dir)

    train_dataset = BertDataset(args.input_train, "train")
    dev_dataset = BertDataset(args.input_dev, "dev")
    test_dataset = BertDataset(args.input_test, "test")

    train_examples = len(train_dataset)

    train_dataloader = \
        BertDataLoader(train_dataset, mode="train", max_len=args.max_len, batch_size=args.batch_size, num_workers=4, shuffle=True)
    dev_dataloader = \
        BertDataLoader(dev_dataset, mode="dev", max_len=args.max_len, batch_size=args.batch_size, num_workers=4, shuffle=False)
    test_dataloader = \
        BertDataLoader(test_dataset, mode="test", max_len=args.max_len, batch_size=int(args.batch_size / 2), num_workers=4, shuffle=False)

    trainer = Trainer(args, model, train_examples, use_gpu)

    if args.resume == False:
        logging.info("Beginning training...")
        trainer.train(train_dataloader, dev_dataloader)

    prediction, id = trainer.predict(test_dataloader)

    with open(os.path.join(args.save_dir, "MG1833039.txt"), "w", encoding="utf-8") as f:
        for index in range(len(prediction)):
            f.write("{}\t{}\n".format(id[index], prediction[index]))

    logging.info("Done!") 
开发者ID:tracy-talent,项目名称:curriculum,代码行数:63,代码来源:main.py

示例6: __init__

# 需要导入模块: from pytorch_pretrained_bert.modeling import BertForSequenceClassification [as 别名]
# 或者: from pytorch_pretrained_bert.modeling.BertForSequenceClassification import from_pretrained [as 别名]
def __init__(
        self,
        language=Language.ENGLISH,
        num_labels=2,
        cache_dir=".",
        use_distributed=False,
    ):

        """

        Args:
            language: Language passed to pre-trained BERT model to pick the appropriate
                model
            num_labels: number of unique labels in train dataset
            cache_dir: cache_dir to load pre-trained BERT model. Defaults to "."
        """
        if num_labels < 2:
            raise ValueError("Number of labels should be at least 2.")

        self.language = language
        self.num_labels = num_labels
        self.cache_dir = cache_dir
        self.use_distributed = use_distributed

        # create classifier
        self.model = BertForSequenceClassification.from_pretrained(
            language.value, cache_dir=cache_dir, num_labels=num_labels
        )

        # define optimizer and model parameters
        param_optimizer = list(self.model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [
                    p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
                ],
                "weight_decay": 0.01,
            },
            {
                "params": [
                    p for n, p in param_optimizer if any(nd in n for nd in no_decay)
                ]
            },
        ]
        self.optimizer_params = optimizer_grouped_parameters
        self.name_parameters = self.model.named_parameters()
        self.state_dict = self.model.state_dict()

        if use_distributed:
            hvd.init()
            if torch.cuda.is_available():
                torch.cuda.set_device(hvd.local_rank())
            else:
                warnings.warn("No GPU available! Using CPU.") 
开发者ID:microsoft,项目名称:nlp-recipes,代码行数:57,代码来源:sequence_classification_distributed.py


注:本文中的pytorch_pretrained_bert.modeling.BertForSequenceClassification.from_pretrained方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。