记录
介绍
首先,这里使用的caffe是修改过的caffe,具体修改方法参加我上一篇博文。
这里示例是识别图片中的3个字符(0-9 a-Z),使用的网络是mnist的修改版。
数据长这样
使用我这个修改版caffe进行多label训练的时候需要在数据层加一个参数label_dim,这个参数的值为你的label的数量,示例可以看下面的网络文件
数据
用于训练的数据是自己用python代码生成的,代码如下:
#!/usr/bin/env python
#coding=utf-8
import random
import Image, ImageDraw, ImageFont, ImageFilter
_letter_cases = "abcdefghjkmnpqrstuvwxy" # 小写字母,去除可能干扰的i,l,o,z
_upper_cases = _letter_cases.upper() # 大写字母
_numbers = ''.join(map(str, range(3, 10))) # 数字
init_chars = ''.join((_letter_cases, _upper_cases, _numbers))
fontType="/usr/share/fonts/truetype/freefont/FreeSans.ttf"
def create_validate_code(size=(56, 56),
chars=init_chars,
img_type="GIF",
mode="RGB",
bg_color=(255, 255, 255),
fg_color=(0, 0, 0),
font_size=18,
font_type=fontType,
length=3,
draw_lines=False,
n_line=(1, 2),
draw_points=False,
point_chance = 2):
'''
@todo: 生成验证码图片
@param size: 图片的大小,格式(宽,高),默认为(120, 30)
@param chars: 允许的字符集合,格式字符串
@param img_type: 图片保存的格式,默认为GIF,可选的为GIF,JPEG,TIFF,PNG
@param mode: 图片模式,默认为RGB
@param bg_color: 背景颜色,默认为白色
@param fg_color: 前景色,验证码字符颜色,默认为蓝色#FFFFFF
@param font_size: 验证码字体大小
@param font_type: 验证码字体,默认为 ae_AlArabiya.ttf
@param length: 验证码字符个数
@param draw_lines: 是否划干扰线
@param n_lines: 干扰线的条数范围,格式元组,默认为(1, 2),只有draw_lines为True时有效
@param draw_points: 是否画干扰点
@param point_chance: 干扰点出现的概率,大小范围[0, 100]
@return: [0]: PIL Image实例
@return: [1]: 验证码图片中的字符串
'''
width, height = size # 宽, 高
img = Image.new(mode, size, bg_color) # 创建图形
draw = ImageDraw.Draw(img) # 创建画笔
if draw_lines:
create_lines(draw,n_line,width,height)
if draw_points:
create_points(draw,point_chance,width,height)
strs = create_strs(draw,chars,length,font_type, font_size,width,height,fg_color)
# 图形扭曲参数
params = [1 - float(random.randint(1, 2)) / 100,
0,
0,
0,
1 - float(random.randint(1, 10)) / 100,
float(random.randint(1, 2)) / 500,
0.001,
float(random.randint(1, 2)) / 500
]
img = img.transform(size, Image.PERSPECTIVE, params) # 创建扭曲
img = img.filter(ImageFilter.EDGE_ENHANCE_MORE) # 滤镜,边界加强(阈值更大)
return img, strs
def create_lines(draw,n_line,width,height):
'''绘制干扰线'''
line_num = random.randint(n_line[0],n_line[1]) # 干扰线条数
for i in range(line_num):
# 起始点
begin = (random.randint(0, width), random.randint(0, height))
#结束点
end = (random.randint(0, width), random.randint(0, height))
draw.line([begin, end], fill=(0, 0, 0))
def create_points(draw,point_chance,width,height):
'''绘制干扰点'''
chance = min(100, max(0, int(point_chance))) # 大小限制在[0, 100]
for w in xrange(width):
for h in xrange(height):
tmp = random.randint(0, 100)
if tmp > 100 - chance:
draw.point((w, h), fill=(0, 0, 0))
def create_strs(draw,chars,length,font_type, font_size,width,height,fg_color):
'''绘制验证码字符'''
'''生成给定长度的字符串,返回列表格式'''
c_chars = random.sample(chars, length)
strs = ' %s ' % ' '.join(c_chars) # 每个字符前后以空格隔开
font = ImageFont.truetype(font_type, font_size)
font_width, font_height = font.getsize(strs)
draw.text(((width - font_width) / 3, (height - font_height) / 3),strs, font=font, fill=fg_color)
return ''.join(c_chars)
if __name__ == "__main__":
for i in xrange(10000):
code_img = create_validate_code()
code_img[0].save('test56/'+code_img[1]+'.jpeg', "jpeg")
print code_img[1] , i
只要稍微修改下代码就可以生成测试集和训练集,我生成了5万张数据集和1万张测试集。然后是生成清单文件,代码如下:
生成清单文件
import os
import os.path
rootdir = 'train56'
file_object = open('train56.txt','w+')
maps={'0':0,'1':1,'2':2,'3':3,'4':4,'5':5,'6':6,'7':7,'8':8,'9':9,
'a':10,'b':12,'c':13,'d':14,'e':15,'f':16,'g':17,'h':18,'i':19,'j':20,'k':21,'l':22,'m':23,
'n':24,'o':25,'p':26,'q':27,'r':28,'s':29,'t':30,'u':31,'v':32,'w':33,'x':34,'y':35,'z':36,
'A':37,'B':38,'C':39,'D':40,'E':41,'F':42,'G':43,'H':44,'I':45,'J':46,'K':47,'L':48,'M':49,
'N':50,'O':51,'P':52,'Q':53,'R':54,'S':55,'T':56,'U':57,'V':58,'W':59,'X':60,'Y':61,'Z':62
}
for parent,dirnames,filenames in os.walk(rootdir):
for d in dirnames :
print d
for x in filenames :
file_object.write('train56/')
file_object.write(x)
file_object.write(' ')
tip=x.split(".")[0]
temp=tip[0]
file_object.write(str( maps[temp] ))
file_object.write(' ')
temp=tip[1]
file_object.write(str( maps[temp] ))
file_object.write(' ')
temp=tip[2]
file_object.write(str( maps[temp] ))
file_object.write('\n')
同样,稍微修改下代码就可以生成训练集和测试集的清单文件。
网络配置文件
name: "JWNet"
layer {
name: "data"
type: "ImageData"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mirror: true
scale: 0.00390625
}
image_data_param {
source: "/home/jiangwei/PycharmProjects/code/train56.txt"
batch_size: 1
root_folder: "/home/jiangwei/PycharmProjects/code/"
label_dim:3
}
}
layer {
name: "data"
type: "ImageData"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
mirror: false
scale: 0.00390625
}
image_data_param {
source: "/home/jiangwei/PycharmProjects/code/test56.txt"
batch_size: 1
root_folder: "/home/jiangwei/PycharmProjects/code/"
label_dim:3
}
}
layer {
name: "slice"
type: "Slice"
bottom: "label"
top: "one"
top: "two"
top: "three"
slice_param {
axis: 1
slice_point: 1 #有n个label需要n-1个slice_point
slice_point: 2
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 20
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 50
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool2"
top: "ip1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 500
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "ip1"
top: "ip1"
}
layer {
name: "ione"
type: "InnerProduct"
bottom: "ip1"
top: "ione"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 62
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "itwo"
type: "InnerProduct"
bottom: "ip1"
top: "itwo"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 62
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "ithree"
type: "InnerProduct"
bottom: "ip1"
top: "ithree"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 62
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "accuracy_one"
type: "Accuracy"
bottom: "ione"
bottom: "one"
top: "accuracy_one"
include {
phase: TEST
}
}
layer {
name: "loss_one"
type: "SoftmaxWithLoss"
bottom: "ione"
bottom: "one"
top: "loss_one"
loss_weight:0.5
}
layer {
name: "itwo"
type: "Accuracy"
bottom: "itwo"
bottom: "two"
top: "accuracy_two"
include {
phase: TEST
}
}
layer {
name: "loss_two"
type: "SoftmaxWithLoss"
bottom: "itwo"
bottom: "two"
top: "loss_two"
loss_weight:0.5
}
layer {
name: "ithree"
type: "Accuracy"
bottom: "ithree"
bottom: "three"
top: "accuracy_three"
include {
phase: TEST
}
}
layer {
name: "loss_three"
type: "SoftmaxWithLoss"
bottom: "ithree"
bottom: "three"
top: "loss_three"
loss_weight:0.5
}
solver文件
net:"train_val56.prototxt"
test_iter: 1000
test_interval: 5000
base_lr: 0.0025
momentum: 0.0
weight_decay: 0.0005
lr_policy: "inv"
gamma: 0.0001
power: 0.75
display: 100
max_iter: 100000
snapshot: 20000
snapshot_prefix: "56model"
solver_mode: GPU
type: "RMSProp"
rms_decay: 0.98
训练
直接控制台输入
/home/jiangwei/bin/caffe-master-duo/build/tools/caffe train --solver=solver56.prototxt
python调用
使用python调用训练好的模型
#coding=utf-8
import caffe
import numpy as np
develop='/home/jiangwei/PycharmProjects/code/develop56.prototxt'
model='/home/jiangwei/PycharmProjects/code/56model_iter_100000.caffemodel'
net = caffe.Net(develop,model,caffe.TEST) #加载model和network
#图片预处理设置
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) #设定图片的shape格式(1,3,56,56)
transformer.set_transpose('data', (2,0,1)) #改变维度的顺序,由原始图片(28,28,3)变为(3,56,56)
#transformer.set_mean('data', np.load(mean_file).mean(1).mean(1)) #减去均值,前面训练模型时没有减均值,这儿就不用
transformer.set_raw_scale('data', 1) # 缩放到【0,1】之间
transformer.set_channel_swap('data', (2,1,0)) #交换通道,将图片由RGB变为BGR
img='/home/jiangwei/PycharmProjects/code/test56/3BE.jpeg'
im=caffe.io.load_image(img) #加载图片
net.blobs['data'].data[...] = transformer.preprocess('data',im) #执行上面设置的图片预处理操作,并将图片载入到blob中
#执行测试
out = net.forward()
onenum= net.blobs['onenum'].data[0].flatten() #取出最后一层(Softmax)属于某个类别的概率值,并打印
twonum= net.blobs['twonum'].data[0].flatten() #取出最后一层(Softmax)属于某个类别的概率值,并打印
threenum= net.blobs['threenum'].data[0].flatten() #取出最后一层(Softmax)属于某个类别的概率值,并打印
order1=onenum.argsort()[-1] #将概率值排序,取出最大值所在的序号
order2=twonum.argsort()[-1]
order3=threenum.argsort()[-1]
re=['0','1','2','3','4','5','6','7','8','9',
'a','b','c','d','e','f','g','h','i','j','k','l','m',
'n','o','p','q','r','s','t','u','v','w','x','y','z',
'A','B','C','D','E','F','G','H','I','J','K','L','M',
'N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
print re[order1],re[order2],re[order3]
总结
总的来说识别结果不算太理想,但是已经证明了caffe多label是可行的。在训练阶段的Accuracy能达到0.7左右,不过最后代码中识别率不怎么高,理论上不应该的,
可能是python中图片预处理有问题,我懒得检测了。如果你有空把网络调一调识别率应该还不错,这个项目可以用来做验证码识别,调调参数,只要你数据好,我觉得能搞定目前90%的图片验证码。