手写数字识别之多GPU训练
技能训练9: 数字技能训练:数据分析 #生活技巧# #学习技巧# #技能训练指南#
内容都是百度AIstudio的内容,我只是在这里做个笔记,不是原创。
单GPU训练
从前几节的训练来看,我们无论是训练房价预测模型还是MNIST手写字符识别模型,训练好一个模型不会超过十分钟,主要原因是我们所使用的神经网络比较简单。但现实生活中,我们可能会遇到更复杂的机器学习、深度学习任务,需要运算速度更高的硬件(GPU、TPU),甚至同时使用多个机器共同训练一个任务(多卡训练和多机训练)。
飞桨动态图通过fluid.dygraph.guard(place=None)里的place参数,设置在GPU上训练还是CPU上训练,比如:
with fluid.dygraph.guard(place=fluid.CPUPlace()) #设置使用CPU资源训神经网络。
with fluid.dygraph.guard(place=fluid.CUDAPlace(0)) #设置使用GPU资源训神经网络,默认使用机器的第一个GPU。
import os
import random
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
import numpy as np
from PIL import Image
import gzip
import json
def load_data(mode='train'):
datafile = './work/mnist.json.gz'
print('loading mnist dataset from {} ......'.format(datafile))
data = json.load(gzip.open(datafile))
train_set, val_set, eval_set = data
IMG_ROWS = 28
IMG_COLS = 28
if mode == 'train':
imgs = train_set[0]
labels = train_set[1]
elif mode == 'valid':
imgs = val_set[0]
labels = val_set[1]
elif mode == 'eval':
imgs = eval_set[0]
labels = eval_set[1]
imgs_length = len(imgs)
assert len(imgs) == len(labels), \
"length of train_imgs({}) should be the same as train_labels({})".format(
len(imgs), len(labels))
index_list = list(range(imgs_length))
BATCHSIZE = 100
def data_generator():
if mode == 'train':
random.shuffle(index_list)
imgs_list = []
labels_list = []
for i in index_list:
img = np.reshape(imgs[i], [1, IMG_ROWS, IMG_COLS]).astype('float32')
label = np.reshape(labels[i], [1]).astype('int64')
imgs_list.append(img)
labels_list.append(label)
if len(imgs_list) == BATCHSIZE:
yield np.array(imgs_list), np.array(labels_list)
imgs_list = []
labels_list = []
if len(imgs_list) > 0:
yield np.array(imgs_list), np.array(labels_list)
return data_generator
class MNIST(fluid.dygraph.Layer):
def __init__(self, name_scope):
super(MNIST, self).__init__(name_scope)
name_scope = self.full_name()
self.conv1 = Conv2D(name_scope, num_filters=20, filter_size=5, stride=1, padding=2, act='relu')
self.pool1 = Pool2D(name_scope, pool_size=2, pool_stride=2, pool_type='max')
self.conv2 = Conv2D(name_scope, num_filters=20, filter_size=5, stride=1, padding=2, act='relu')
self.pool2 = Pool2D(name_scope, pool_size=2, pool_stride=2, pool_type='max')
self.fc = FC(name_scope, size=10, act='softmax')
def forward(self, inputs):
x = self.conv1(inputs)
x = self.pool1(x)
x = self.conv2(x)
x = self.pool2(x)
x = self.fc(x)
return x
use_gpu = False
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
with fluid.dygraph.guard(place):
model = MNIST("mnist")
model.train()
train_loader = load_data('train')
optimizer = fluid.optimizer.SGDOptimizer(learning_rate=0.01)
EPOCH_NUM = 2
for epoch_id in range(EPOCH_NUM):
for batch_id, data in enumerate(train_loader()):
image_data, label_data = data
image = fluid.dygraph.to_variable(image_data)
label = fluid.dygraph.to_variable(label_data)
predict = model(image)
loss = fluid.layers.cross_entropy(predict, label)
avg_loss = fluid.layers.mean(loss)
if batch_id % 200 == 0:
print("epoch: {}, batch: {}, loss is: {}".format(epoch_id, batch_id, avg_loss.numpy()))
avg_loss.backward()
optimizer.minimize(avg_loss)
model.clear_gradients()
fluid.save_dygraph(model.state_dict(), 'mnist')
感受到GPU和CPU之间的区别,数据如下:
用GPU跑,每次都在20秒左右
用CPU跑,每次都在105秒左右
高下立见
网址:手写数字识别之多GPU训练 https://www.yuejiaxmz.com/news/view/219048
相关内容
智能生活垃圾检测与分类系统(UI界面+YOLOv5+训练数据集)gpu压力测试工具
视频语音识别文字
一种实时语音识别数字的方法
基于深度学习的生活垃圾检测与分类系统(网页版+YOLOv8/v7/v6/v5代码+训练数据集)
目标检测算法: 对Faster RCNN论文的理解与实践
什么是递增组训练?递减组训练和金字塔训练法
代谢训练(MT)=代谢阻力训练(MRT) 有氧间歇训练(CRT)
智能AI语音识别算法:实现高精度语音转文字技术解析
放松训练