学习来源:https://blog.csdn.net/minfanphd/article/details/116974889
1、initOperators 初始化若干算子。 并且注意到它们与已经初始化的成员变量有关。
2、ALPHA 和 LAMBDA 是超参数, 可以自己设置。
3、setup 进行整个网络的初始化。
4、forward 和 backPropagation 与 ANN 同理, 但运算不同。
5、一批数据进行 forward 和 backPropagation 后, 才进行一次 updateParameters。
6、部分代码。
package CNN;
/**
* @time 2022/6/26
* @author Liang Huang
*/
import java.util.Arrays;
import machinelearning.cnn.Dataset.Instance;
import machinelearning.cnn.MathUtils.Operator;
public class FullCnn {
/**
* The value changes.
*/
private static double ALPHA = 0.85;
/**
* A constant.
*/
public static double LAMBDA = 0;
/**
* Manage layers.
*/
private static LayerBuilder layerBuilder;
/**
* Train using a number of instances simultaneously.
*/
private int batchSize;
/**
* Divide the batch size with the given value.
*/
private Operator divideBatchSize;
/**
* Multiply alpha with the given value.
*/
private Operator multiplyAlpha;
/**
* Multiply lambda and alpha with the given value.
*/
private Operator multiplyLambda;
/**
***********************
* The first constructor.
*
***********************
*/
public FullCnn(LayerBuilder paraLayerBuilder, int paraBatchSize) {
layerBuilder = paraLayerBuilder;
batchSize = paraBatchSize;
setup();
initOperators();
}// Of the first constructor
/**
***********************
* Initialize operators using temporary classes.
***********************
*/
private void initOperators() {
divideBatchSize = new Operator() {
private static final long serialVersionUID = 7424011281732651055L;
@Override
public double process(double value) {
return value / batchSize;
}// Of process
};
multiplyAlpha = new Operator() {
private static final long serialVersionUID = 5761368499808006552L;
@Override
public double process(double value) {
return value * ALPHA;
}// Of process
};
multiplyLambda = new Operator() {
private static final long serialVersionUID = 4499087728362870577L;
@Override
public double process(double value) {
return value * (1 - LAMBDA * ALPHA);
}// Of process
};
}// Of initOperators
/**
***********************
* Setup according to the layer builder.
***********************
*/
public void setup() {
CnnLayer tempInputLayer = layerBuilder.getLayer(0);
tempInputLayer.initOutMaps(batchSize);
for (int i = 1; i < layerBuilder.getNumLayers(); i++) {
CnnLayer tempLayer = layerBuilder.getLayer(i);
CnnLayer tempFrontLayer = layerBuilder.getLayer(i - 1);
int tempFrontMapNum = tempFrontLayer.getOutMapNum();
switch (tempLayer.getType()) {
case INPUT:
// Should not be input. Maybe an error should be thrown out.
break;
case CONVOLUTION:
tempLayer.setMapSize(
tempFrontLayer.getMapSize().subtract(tempLayer.getKernelSize(), 1));
tempLayer.initKernel(tempFrontMapNum);
tempLayer.initBias();
tempLayer.initErrors(batchSize);
tempLayer.initOutMaps(batchSize);
break;
case SAMPLING:
tempLayer.setOutMapNum(tempFrontMapNum);
tempLayer.setMapSize(tempFrontLayer.getMapSize().divide(tempLayer.getScaleSize()));
tempLayer.initErrors(batchSize);
tempLayer.initOutMaps(batchSize);
break;
case OUTPUT:
tempLayer.initOutputKernel(tempFrontMapNum, tempFrontLayer.getMapSize());
tempLayer.initBias();
tempLayer.initErrors(batchSize);
tempLayer.initOutMaps(batchSize);
break;
}// Of switch
} // Of for i
}// Of setup
/**
***********************
* Forward computing.
***********************
*/
private void forward(Instance instance) {
setInputLayerOutput(instance);
for (int l = 1; l < layerBuilder.getNumLayers(); l++) {
CnnLayer tempCurrentLayer = layerBuilder.getLayer(l);
CnnLayer tempLastLayer = layerBuilder.getLayer(l - 1);
switch (tempCurrentLayer.getType()) {
case CONVOLUTION:
setConvolutionOutput(tempCurrentLayer, tempLastLayer);
break;
case SAMPLING:
setSampOutput(tempCurrentLayer, tempLastLayer);
break;
case OUTPUT:
setConvolutionOutput(tempCurrentLayer, tempLastLayer);
break;
default:
break;
}// Of switch
} // Of for l
}// Of forward
/**
***********************
* Set the in layer output. Given a record, copy its values to the input
* map.
***********************
*/
private void setInputLayerOutput(Instance paraRecord) {
CnnLayer tempInputLayer = layerBuilder.getLayer(0);
Size tempMapSize = tempInputLayer.getMapSize();
double[] tempAttributes = paraRecord.getAttributes();
if (tempAttributes.length != tempMapSize.width * tempMapSize.height)
throw new RuntimeException("input record does not match the map size.");
for (int i = 0; i < tempMapSize.width; i++) {
for (int j = 0; j < tempMapSize.height; j++) {
tempInputLayer.setMapValue(0, i, j, tempAttributes[tempMapSize.height * i + j]);
} // Of for j
} // Of for i
}// Of setInputLayerOutput
/**
***********************
* Compute the convolution output according to the output of the last layer.
*
* @param paraLastLayer the last layer.
* @param paraLayer the current layer.
***********************
*/
private void setConvolutionOutput(final CnnLayer paraLayer, final CnnLayer paraLastLayer) {
// int mapNum = paraLayer.getOutMapNum();
final int lastMapNum = paraLastLayer.getOutMapNum();
// Attention: paraLayer.getOutMapNum() may not be right.
for (int j = 0; j < paraLayer.getOutMapNum(); j++) {
double[][] tempSumMatrix = null;
for (int i = 0; i < lastMapNum; i++) {
double[][] lastMap = paraLastLayer.getMap(i);
double[][] kernel = paraLayer.getKernel(i, j);
if (tempSumMatrix == null) {
// On the first map.
tempSumMatrix = MathUtils.convnValid(lastMap, kernel);
} else {
// Sum up convolution maps
tempSumMatrix = MathUtils.matrixOp(MathUtils.convnValid(lastMap, kernel),
tempSumMatrix, null, null, MathUtils.plus);
} // Of if
} // Of for i
// Activation.
final double bias = paraLayer.getBias(j);
tempSumMatrix = MathUtils.matrixOp(tempSumMatrix, new Operator() {
private static final long serialVersionUID = 2469461972825890810L;
@Override
public double process(double value) {
return MathUtils.sigmod(value + bias);
}
});
paraLayer.setMapValue(j, tempSumMatrix);
} // Of for j
}// Of setConvolutionOutput
/**
***********************
* Compute the convolution output according to the output of the last layer.
*
* @param paraLastLayer the last layer.
* @param paraLayer the current layer.
***********************
*/
private void setSampOutput(final CnnLayer paraLayer, final CnnLayer paraLastLayer) {
// int tempLastMapNum = paraLastLayer.getOutMapNum();
// Attention: paraLayer.outMapNum may not be right.
for (int i = 0; i < paraLayer.outMapNum; i++) {
double[][] lastMap = paraLastLayer.getMap(i);
Size scaleSize = paraLayer.getScaleSize();
double[][] sampMatrix = MathUtils.scaleMatrix(lastMap, scaleSize);
paraLayer.setMapValue(i, sampMatrix);
} // Of for i
}// Of setSampOutput
/**
***********************
* The main entrance.
***********************
*/
public static void main(String[] args) {
}// Of main
}// Of class MfCnn