我一直在尝试通过源代码来训练一个漏洞检测模型。而且,经过一点搜索,我认为一个很好的起点可能是使用来自HuggingFace和PyTorch和pl.lightning火炬的预培训变压器模型。我选择DistilBert是因为它是最快的。
我有一个不平衡的数据集,大约70%的非易受攻击和30%的易受攻击功能.
然而,我的成绩一直很差。这个模型似乎没有学习和推广。具体来说,在训练期间,列车损失剧烈振荡,准确率约为70 %,召回率极低(这意味着模型总是预测一个标签)。
我想知道我的代码是否存在明显的问题。这是我第一次使用预先训练过的模型和闪电,我不知道可能出了什么问题。
class Model(pl.LightningModule):
def __init__(self, n_classes, n_training_steps, n_warmup_steps, lr, fine_tune=False):
super().__init__()
self.save_hyperparameters()
self.bert = DistilBert.from_pretrained(BERT_MODEL_NAME, return_dict=True)
for name, param in self.bert.named_parameters():
param.requires_grad = False
self.classifier = nn.Linear(self.bert.config.hidden_size, self.hparams.n_classes)
self.criterion = nn.BCELoss()
def finetune(self):
self.fine_tune = True
for name, param in self.bert.named_parameters():
if 'layer.5' in name:
param.requires_grad = True
def forward(self, input_ids, attention_mask, labels=None):
x = self.bert(input_ids, attention_mask=attention_mask)
x = x.last_hidden_state[:,0,:]
x = self.classifier(x)
x = torch.sigmoid(x)
x = x.squeeze(dim=-1)
loss = 0
if labels is not None:
loss = self.criterion(x, labels.float())
return loss, x
def training_step(self, batch, batch_idx):
enc, labels = batch
input_ids, attention_mask = enc
loss, outputs = self.forward(input_ids, attention_mask, labels)
self.log("train_loss", loss, prog_bar=True, logger=True)
return {'loss': loss, 'predictions': outputs, 'labels': labels}
def validation_step(self, batch, batch_idx):
enc, labels = batch
input_ids, attention_mask = enc
loss, outputs = self.forward(input_ids, attention_mask, labels)
r = recall(outputs[:], labels[:])
self.log("val_loss", loss, prog_bar=True, logger=True)
self.log("val_recall", r, prog_bar=True, logger=True)
return {'loss': loss, 'predictions': outputs, 'labels': labels}
def test_step(self, batch, batch_idx):
enc, labels = batch
input_ids, attention_mask = enc
loss, outputs = self.forward(input_ids, attention_mask, labels)
self.log("test_loss", loss, prog_bar=True, logger=True)
return {'loss': loss, 'predictions': outputs, 'labels': labels}
def training_epoch_end(self, outputs):
labels = []
predictions = []
for o in outputs:
for o_labels in o['labels'].detach().cpu():
labels.append(o_labels)
for o_preds in o['predictions'].detach().cpu():
predictions.append(o_preds)
labels = torch.stack(labels).int()
predictions = torch.stack(predictions)
class_recall = recall(predictions[:], labels[:])
self.logger.experiment.add_scalar("recall/Train", class_recall, self.current_epoch)
def validation_epoch_end(self, outputs):
labels = []
predictions = []
for o in outputs:
for o_labels in o['labels'].detach().cpu():
labels.append(o_labels)
for o_preds in o['predictions'].detach().cpu():
predictions.append(o_preds)
labels = torch.stack(labels).int()
predictions = torch.stack(predictions)
class_recall = recall(predictions[:], labels[:])
self.logger.experiment.add_scalar("recall/Validation", class_recall, self.current_epoch)
def test_epoch_end(self, outputs):
labels = []
predictions = []
for o in outputs:
for o_labels in o['labels'].detach().cpu():
labels.append(o_labels)
for o_preds in o['predictions'].detach().cpu():
predictions.append(o_preds)
labels = torch.stack(labels).int()
predictions = torch.stack(predictions)
class_recall = recall(predictions[:], labels[:])
self.logger.experiment.add_scalar("recall/Test", class_recall, self.current_epoch)
def configure_optimizers(self):
optimizer = AdamW(self.parameters(), lr=self.hparams.lr if self.hparams.fine_tune == False else self.hparams.lr // 100)
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=self.hparams.n_warmup_steps,
num_training_steps=self.hparams.n_training_steps
)
return dict(
optimizer=optimizer,
lr_scheduler=dict(
scheduler=scheduler,
interval='step'
)
)
if __name__ == "__main__":
data_module = SourceCodeDataModule(batch_size=BATCH_SIZE)
steps_per_epoch = len(train_loader) // BATCH_SIZE
total_training_steps = steps_per_epoch * N_EPOCHS
warmup_steps = total_training_steps // 5
model = Model(
n_classes=1,
n_warmup_steps = warmup_steps,
n_training_steps=total_training_steps,
lr=2e-5
)
logger = TensorBoardLogger("lightning_logs", name="bert_predictor")
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=2)
trainer = pl.Trainer(
logger=logger,
checkpoint_callback=checkpoint_callback,
callbacks=[early_stopping_callback],
max_epochs=N_EPOCHS,
gpus=1 if str(device).startswith('cuda') else 0,
progress_bar_refresh_rate=30
)
# First just train the final layer.
trainer.fit(model, datamodule=data_module)
result = trainer.test(model, datamodule=data_module)
print(f"Result when training classifier only: {result}")
# Then train the whole model
model = Model.load_from_checkpoint(trainer.checkpoint_callback.best_model_path)
model.finetune()
trainer.fit(model, datamodule=data_module)
result = trainer.test(model, datamodule=data_module)
print(f"Result when fine tuning: {result}")发布于 2022-05-11 21:07:00
这里,
def finetune(self):
self.fine_tune = True
for name, param in self.bert.named_parameters():
if 'layer.5' in name:
param.requires_grad = True试着解冻更多的层在神经网络的末尾,也许权重已经饱和,学习不足。此外,注意您正在使用的损失,以及输出的激活函数。
https://stackoverflow.com/questions/72207543
复制相似问题