计算机视觉三:vgg16分类cifar10
chatgpt的实现
import torch
import torch.nn as nn
class VGG16(nn.Module):
def __init__(self, num_classes=1000):
super(VGG16, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(128, 128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(128, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(256, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.classifier = nn.Sequential(
nn.Linear(512 * 7 * 7, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, num_classes)
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
# 创建VGG16模型
model = VGG16()
print(model)
for name,param in model.named_parameters():
print(f"{name}的参数是{param.shape}")
网上的实现方式
来自VGG16 训练 Cifar10 –91.64%的成功率 – 知乎 (zhihu.com)详情查看上面的文章
#导入包
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import time
import sys
import itertools
import random
import matplotlib.image as mpimg
from PIL import Image
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#导入数据集
#到导入的数据进行归一化
transform_norm = transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010))
transform = transforms.Compose([transforms.ToTensor(),transform_norm])
trainset0 = torchvision.datasets.CIFAR10('./data', train=True, download=False, transform=transform)
testset = torchvision.datasets.CIFAR10('./data', train=False, download=False, transform=transform)
print(type(trainset0[0]))
# trainset.targets返回数据集的标签
# trainset.data 返回数据的shape(50000,32,32,3)
# 即5万张照片,(32,32,3)代表像素32X32,每个像素点有3个数字(0-255,0-255,0-255)表示颜色
classes = ('airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
#取出一部分训练集作为验证集
class myDataset(torch.utils.data.Dataset):
def __init__(self,trainset,choice,num_val,transform=None,transform_norm=None,train=True):
#trainset是输入的训练集,choice和num_val是随机打乱trainset,选出一部分
self.transform = transform
self.transform_norm = transform_norm
self.train=train
self.num_val = num_val
self.choice = choice
if self.train:
self.images=trainset.data[self.choice[self.num_val:]].copy()
self.labels=[trainset.targets[i] for i in self.choice[self.num_val:]]
else:
self.images=trainset.data[self.choice[:self.num_val]].copy()
self.labels=[trainset.targets[i] for i in self.choice[:self.num_val]]
def __len__(self):
return len(self.images)
def __getitem__(self,index):
image,label=self.images[index],self.labels[index]
image=transforms.ToTensor()(image)
if self.transform:
image=self.transform(image)
if self.transform_norm:
image=self.transform_norm(image)
sample = (image,label)
return sample
#打乱原始数据集
choice=list(range(len(trainset0)))
random.shuffle(choice)
#图像增强
transform_plus=transforms.Compose([
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomAffine(degrees=15, translate=(0.1,0.1))
])
validset=myDataset(trainset0,choice,len(trainset0)//10,None,None,False)
trainset=myDataset(trainset0,choice,len(trainset0)//10,transform_plus,None,True)
#构建网络
class vgg16_block(nn.Module):
def __init__(self,in_channels,out_channels,rate=0.4,drop=True):
super().__init__()
self.conv=nn.Conv2d(in_channels,out_channels,3,1,1)
self.bn=nn.BatchNorm2d(out_channels)
self.relu=nn.ReLU(inplace=True)
self.dropout=nn.Dropout(rate)
self.drop=drop
def forward(self,x):
print(x.shape)
x=self.relu(self.bn(self.conv(x)))
print(x.shape)
if self.drop:
x=self.dropout(x)
return x
def vgg16_layer(in_channels,out_channels,num,dropout=[0.4,0.4]):
result=[]
result.append(vgg16_block(in_channels,out_channels,dropout[0]))
for i in range(1,num-1):
result.append(vgg16_block(out_channels,out_channels,dropout[1]))
if num>1:
result.append(vgg16_block(out_channels,out_channels,drop=False))
result.append(nn.MaxPool2d(kernel_size=(2,2)))
return result
b1=nn.Sequential(*vgg16_layer(3,64,2,[0.3,0.4]),*vgg16_layer(64,128,2),
*vgg16_layer(128,256,3),*vgg16_layer(256,512,3),*vgg16_layer(512,512,3))
b2=nn.Sequential(nn.Dropout(0.5),nn.Flatten(),nn.Linear(512,512,bias=True),nn.BatchNorm1d(512),nn.ReLU(inplace=True),nn.Linear(512,10,bias=True))
net=nn.Sequential(b1,b2)
#定义验证集的输出
def test(net,validloader):
test_loss=0
test_accuracy=0
time=0
#关闭BN层和Dropout
net.eval()
with torch.no_grad():
for data in validloader:
inputs,labels=data
inputs=inputs.to(device)
labels=labels.to(device)
outputs=net(inputs)
test_loss += loss_fn(outputs,labels).item()*len(labels)
test_accuracy += (torch.max(outputs.data,1)[1]==labels).sum()
time+=1
return (test_loss/len(validloader),test_accuracy/len(validloader)*100)
#学习率,优化器
net.to(device)
net.train()
epoch_num=150
batch_num=128
learning_rate=0.1
train_num=len(trainset)//batch_num
los=[]
cor=[]
train_los=[]
train_cor=[]
net_corr,net_los,net_train_los,net_train_corr,net_lr,net_epoch=0,0,0,0,0,0
loss_fn=nn.CrossEntropyLoss()
opt=optim.SGD(net.parameters(),lr=learning_rate,momentum=0.9,weight_decay=1e-6,nesterov=True)
scheduler=optim.lr_scheduler.CosineAnnealingWarmRestarts(opt,T_0=22,T_mult=2)
trainloader=torch.utils.data.DataLoader(trainset,batch_size=batch_num,shuffle=True)
vaildloader=torch.utils.data.DataLoader(validset,batch_size=batch_num,shuffle=True)
#训练数据
for epoch in range(epoch_num):
loss_avg=0
train_time=0
correct=0
num_img=0
for data in trainloader:
inputs,labels=data
inputs=inputs.to(device)
labels=labels.to(device)
net.train()
outputs=net(inputs)
print(outputs.data.shape)
loss=loss_fn(outputs,labels)
loss.to(device)
opt.zero_grad()
loss.backward()
torch.nn.utils.clip_grad_norm_(net.parameters(),20)
opt.step()
train_time+=1
loss_avg+=loss.item()*len(labels)
predict=torch.max(outputs.data,1)[0]
correct+=predict.eq(labels).sum()
num_img+=len(labels)
print('r',end="")
print("Epoch: {} ".format(epoch+1))
scheduler.step()
print('r',end="")
#验证机结果
los2,cor2=test(net,vaildloader)
print("正在训练:{}/{}轮,学习率为:{:.2f},平均loss:{:.2f},正确率为:{:.2f}%,验证集损失为{:.2f},成功率为{:.2f}%".format(epoch+1,epoch_num,opt.state_dict()["param_groups"][0]["lr"],loss_avg/num_img,correct/num_img*100,los2,cor2.item()))
los.append(los2)
cor.append(cor2)
train_cor.append(correct/num_img*100)
train_los.append(loss_avg/num_img)
if net_corr < cor2:
net_corr, net_los,net_train_los, net_train_corr, net_lr, net_epoch = cor2, los2, loss_avg/num_img,correct/num_img,opt.state_dict()['param_groups'][0]['lr'], epoch+1
torch.save(net, 'net_model.pkl')
print('第{}个epoch时模型最优,学习率为{:.8f}, 训练损失为{:.4f}, 训练正确率为{:.2f}%, 验证损失为{:.4f}, 验证正确率为{:.2f}%'.format(net_epoch, net_lr, net_train_los, net_train_corr*100, net_los, net_corr))
best_net = torch.load('net_model.pkl')
testloader = torch.utils.data.DataLoader(testset, batch_size=100)
test_loss=0
test_correct=0
time=0
best_net.eval()
with torch.no_grad():
for data in testloader:
inputs, labels =data
inputs = inputs.to(device)
labels = labels.to(device)
outputs=best_net(inputs)
test_loss += loss_fn(outputs, labels).item()*len(labels)
test_correct += (torch.max(outputs.data,1)[1] == labels).sum()
time += 1
print('共测试{}个图片,平均损失是{:0.2f},成功率为{:0.2f}%'.format(len(testset.data), test_loss/len(testset.data), test_correct/len(testset.data)*100))
上面一共使用了13个卷积层和2个线性层,准确来说是15层。过多的层次会出现过拟合的现象发生,所以就有了resnet。