使用C和C++实现简单神经网络

本文将分别使用C和C++实现一个简单的前馈神经网络,包括网络结构设计、前向传播和反向传播算法。我们将从基础构建,逐步实现一个可用于分类任务的神经网络。

一、C语言实现神经网络

1. 数据结构定义

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>

// 定义神经网络层结构
typedef struct {
    int input_size;      // 输入神经元数量
    int output_size;     // 输出神经元数量
    double** weights;    // 权重矩阵
    double* biases;      // 偏置向量
    double* output;      // 该层的输出
    double* delta;       // 误差项
} Layer;

// 定义神经网络结构
typedef struct {
    int num_layers;      // 网络层数
    Layer* layers;       // 网络层数组
} NeuralNetwork;

// Sigmoid激活函数
double sigmoid(double x) {
    return 1.0 / (1.0 + exp(-x));
}

// Sigmoid函数的导数
double sigmoid_derivative(double x) {
    double s = sigmoid(x);
    return s * (1 - s);
}

2. 初始化网络

// 创建一个新的神经网络层
Layer create_layer(int input_size, int output_size) {
    Layer layer;
    layer.input_size = input_size;
    layer.output_size = output_size;
    
    // 分配内存并初始化权重
    layer.weights = (double**)malloc(output_size * sizeof(double*));
    for (int i = 0; i < output_size; i++) {
        layer.weights[i] = (double*)malloc(input_size * sizeof(double));
        for (int j = 0; j < input_size; j++) {
            // 使用小的随机值初始化权重
            layer.weights[i][j] = ((double)rand() / RAND_MAX) * 2.0 - 1.0;
        }
    }
    
    // 分配内存并初始化偏置
    layer.biases = (double*)malloc(output_size * sizeof(double));
    for (int i = 0; i < output_size; i++) {
        layer.biases[i] = 0.0;
    }
    
    // 分配输出和delta内存
    layer.output = (double*)malloc(output_size * sizeof(double));
    layer.delta = (double*)malloc(output_size * sizeof(double));
    
    return layer;
}

// 创建一个新的神经网络
NeuralNetwork create_neural_network(int num_layers, int* layer_sizes) {
    NeuralNetwork network;
    network.num_layers = num_layers - 1; // 不包括输入层
    
    // 分配层数组内存
    network.layers = (Layer*)malloc(network.num_layers * sizeof(Layer));
    
    // 创建每一层
    for (int i = 0; i < network.num_layers; i++) {
        network.layers[i] = create_layer(layer_sizes[i], layer_sizes[i+1]);
    }
    
    return network;
}

3. 前向传播实现

// 执行前向传播
void forward_propagate(NeuralNetwork network, double* input) {
    // 第一层使用输入数据
    Layer first_layer = network.layers[0];
    
    // 计算第一层的输出
    for (int i = 0; i < first_layer.output_size; i++) {
        double sum = first_layer.biases[i];
        for (int j = 0; j < first_layer.input_size; j++) {
            sum += first_layer.weights[i][j] * input[j];
        }
        first_layer.output[i] = sigmoid(sum);
    }
    
    // 处理剩余层
    for (int l = 1; l < network.num_layers; l++) {
        Layer prev_layer = network.layers[l-1];
        Layer curr_layer = network.layers[l];
        
        // 计算当前层的输出
        for (int i = 0; i < curr_layer.output_size; i++) {
            double sum = curr_layer.biases[i];
            for (int j = 0; j < curr_layer.input_size; j++) {
                sum += curr_layer.weights[i][j] * prev_layer.output[j];
            }
            curr_layer.output[i] = sigmoid(sum);
        }
    }
}

4. 反向传播实现

// 执行反向传播
void backward_propagate(NeuralNetwork network, double* input, double* target, double learning_rate) {
    int output_layer_idx = network.num_layers - 1;
    Layer output_layer = network.layers[output_layer_idx];
    
    // 计算输出层的delta (dE/dY * dY/dX)
    for (int i = 0; i < output_layer.output_size; i++) {
        double output = output_layer.output[i];
        output_layer.delta[i] = (output - target[i]) * output * (1 - output);
    }
    
    // 计算隐藏层的delta
    for (int l = output_layer_idx - 1; l >= 0; l--) {
        Layer curr_layer = network.layers[l];
        Layer next_layer = network.layers[l+1];
        
        // 计算当前层每个神经元的delta
        for (int i = 0; i < curr_layer.output_size; i++) {
            double sum = 0.0;
            for (int j = 0; j < next_layer.output_size; j++) {
                sum += next_layer.delta[j] * next_layer.weights[j][i];
            }
            double output = curr_layer.output[i];
            curr_layer.delta[i] = sum * output * (1 - output);
        }
    }
    
    // 更新权重和偏置
    // 首先处理第一层
    Layer first_layer = network.layers[0];
    for (int i = 0; i < first_layer.output_size; i++) {
        for (int j = 0; j < first_layer.input_size; j++) {
            first_layer.weights[i][j] -= learning_rate * first_layer.delta[i] * input[j];
        }
        first_layer.biases[i] -= learning_rate * first_layer.delta[i];
    }
    
    // 然后处理剩余层
    for (int l = 1; l < network.num_layers; l++) {
        Layer prev_layer = network.layers[l-1];
        Layer curr_layer = network.layers[l];
        
        for (int i = 0; i < curr_layer.output_size; i++) {
            for (int j = 0; j < curr_layer.input_size; j++) {
                curr_layer.weights[i][j] -= learning_rate * curr_layer.delta[i] * prev_layer.output[j];
            }
            curr_layer.biases[i] -= learning_rate * curr_layer.delta[i];
        }
    }
}

5. 训练和预测函数

// 训练网络
void train(NeuralNetwork network, double** inputs, double** targets, int num_samples, int epochs, double learning_rate) {
    for (int epoch = 0; epoch < epochs; epoch++) {
        double total_error = 0.0;
        
        for (int s = 0; s < num_samples; s++) {
            forward_propagate(network, inputs[s]);
            
            // 计算均方误差
            Layer output_layer = network.layers[network.num_layers - 1];
            for (int i = 0; i < output_layer.output_size; i++) {
                double error = targets[s][i] - output_layer.output[i];
                total_error += error * error;
            }
            
            backward_propagate(network, inputs[s], targets[s], learning_rate);
        }
        
        total_error /= (2 * num_samples);
        
        if (epoch % 100 == 0) {
            printf("Epoch %d, Error: %f\n", epoch, total_error);
        }
    }
}

// 使用训练好的网络进行预测
double* predict(NeuralNetwork network, double* input) {
    forward_propagate(network, input);
    
    Layer output_layer = network.layers[network.num_layers - 1];
    double* prediction = (double*)malloc(output_layer.output_size * sizeof(double));
    
    for (int i = 0; i < output_layer.output_size; i++) {
        prediction[i] = output_layer.output[i];
    }
    
    return prediction;
}

6. 内存释放函数

// 释放神经网络占用的内存
void free_neural_network(NeuralNetwork network) {
    for (int l = 0; l < network.num_layers; l++) {
        Layer layer = network.layers[l];
        
        for (int i = 0; i < layer.output_size; i++) {
            free(layer.weights[i]);
        }
        
        free(layer.weights);
        free(layer.biases);
        free(layer.output);
        free(layer.delta);
    }
    
    free(network.layers);
}

7. 完整示例 - XOR问题

int main() {
    // 设置随机数种子
    srand(time(NULL));
    
    // 定义XOR问题的输入和目标输出
    double inputs[4][2] = {
        {0, 0},
        {0, 1},
        {1, 0},
        {1, 1}
    };
    
    double targets[4][1] = {
        {0},
        {1},
        {1},
        {0}
    };
    
    // 创建指针数组用于训练函数
    double* input_ptrs[4];
    double* target_ptrs[4];
    for (int i = 0; i < 4; i++) {
        input_ptrs[i] = inputs[i];
        target_ptrs[i] = targets[i];
    }
    
    // 定义网络结构: 输入层(2)-隐藏层(4)-输出层(1)
    int layer_sizes[3] = {2, 4, 1};
    NeuralNetwork network = create_neural_network(3, layer_sizes);
    
    // 训练网络
    printf("Training neural network for XOR problem...\n");
    train(network, input_ptrs, target_ptrs, 4, 10000, 0.1);
    
    // 测试网络
    printf("\nTesting network:\n");
    for (int i = 0; i < 4; i++) {
        double* prediction = predict(network, inputs[i]);
        printf("Input: [%.0f, %.0f], Prediction: %.6f, Expected: %.0f\n", 
               inputs[i][0], inputs[i][1], prediction[0], targets[i][0]);
        free(prediction);
    }
    
    // 释放内存
    free_neural_network(network);
    
    return 0;
}

二、C++实现神经网络

1. 类结构定义

#include <iostream>
#include <vector>
#include <cmath>
#include <random>
#include <algorithm>

// 神经网络层类
class Layer {
private:
    int inputSize;
    int outputSize;
    std::vector<std::vector<double>> weights;
    std::vector<double> biases;
    std::vector<double> outputs;
    std::vector<double> deltas;
    
public:
    Layer(int inputSize, int outputSize);
    
    std::vector<double> forward(const std::vector<double>& inputs);
    void backward(const std::vector<double>& prevOutputs, const std::vector<double>& nextDeltas, 
                  const std::vector<std::vector<double>>& nextWeights, double learningRate);
    void updateLastLayer(const std::vector<double>& targets, double learningRate);
    
    // Getters
    const std::vector<double>& getOutputs() const { return outputs; }
    const std::vector<double>& getDeltas() const { return deltas; }
    const std::vector<std::vector<double>>& getWeights() const { return weights; }
    int getOutputSize() const { return outputSize; }
};

// 神经网络类
class NeuralNetwork {
private:
    std::vector<Layer> layers;
    
public:
    NeuralNetwork(const std::vector<int>& layerSizes);
    
    std::vector<double> forward(const std::vector<double>& inputs);
    void backward(const std::vector<double>& inputs, const std::vector<double>& targets, double learningRate);
    void train(const std::vector<std::vector<double>>& inputs, 
               const std::vector<std::vector<double>>& targets, 
               int epochs, double learningRate);
    std::vector<double> predict(const std::vector<double>& inputs);
};

2. Layer类实现

// Sigmoid激活函数
double sigmoid(double x) {
    return 1.0 / (1.0 + std::exp(-x));
}

// Sigmoid函数的导数
double sigmoidDerivative(double x) {
    double s = sigmoid(x);
    return s * (1.0 - s);
}

// Layer构造函数
Layer::Layer(int inputSize, int outputSize) 
    : inputSize(inputSize), outputSize(outputSize), 
      outputs(outputSize, 0.0), deltas(outputSize, 0.0) {
    
    // 随机数生成器
    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<double> dis(-1.0, 1.0);
    
    // 初始化权重
    weights.resize(outputSize, std::vector<double>(inputSize));
    for (int i = 0; i < outputSize; ++i) {
        for (int j = 0; j < inputSize; ++j) {
            weights[i][j] = dis(gen);
        }
    }
    
    // 初始化偏置
    biases.resize(outputSize, 0.0);
}

// 前向传播
std::vector<double> Layer::forward(const std::vector<double>& inputs) {
    for (int i = 0; i < outputSize; ++i) {
        double sum = biases[i];
        for (int j = 0; j < inputSize; ++j) {
            sum += weights[i][j] * inputs[j];
        }
        outputs[i] = sigmoid(sum);
    }
    return outputs;
}

// 反向传播(输出层)
void Layer::updateLastLayer(const std::vector<double>& targets, double learningRate) {
    // 计算输出层的delta
    for (int i = 0; i < outputSize; ++i) {
        double output = outputs[i];
        deltas[i] = (output - targets[i]) * output * (1 - output);
    }
}

// 反向传播(隐藏层)
void Layer::backward(const std::vector<double>& prevOutputs, const std::vector<double>& nextDeltas, 
                    const std::vector<std::vector<double>>& nextWeights, double learningRate) {
    
    // 计算当前层的delta
    if (!nextDeltas.empty()) {  // 非输出层
        for (int i = 0; i < outputSize; ++i) {
            double sum = 0.0;
            for (int j = 0; j < nextDeltas.size(); ++j) {
                sum += nextDeltas[j] * nextWeights[j][i];
            }
            double output = outputs[i];
            deltas[i] = sum * output * (1 - output);
        }
    }
    
    // 更新权重和偏置
    for (int i = 0; i < outputSize; ++i) {
        for (int j = 0; j < inputSize; ++j) {
            weights[i][j] -= learningRate * deltas[i] * prevOutputs[j];
        }
        biases[i] -= learningRate * deltas[i];
    }
}

3. NeuralNetwork类实现

// 神经网络构造函数
NeuralNetwork::NeuralNetwork(const std::vector<int>& layerSizes) {
    for (size_t i = 1; i < layerSizes.size(); ++i) {
        layers.push_back(Layer(layerSizes[i-1], layerSizes[i]));
    }
}

// 前向传播
std::vector<double> NeuralNetwork::forward(const std::vector<double>& inputs) {
    std::vector<double> currentInputs = inputs;
    
    for (Layer& layer : layers) {
        currentInputs = layer.forward(currentInputs);
    }
    
    return currentInputs;  // 返回最后一层的输出
}

// 反向传播
void NeuralNetwork::backward(const std::vector<double>& inputs, 
                            const std::vector<double>& targets, 
                            double learningRate) {
    
    // 获取输出层
    Layer& outputLayer = layers.back();
    outputLayer.updateLastLayer(targets, learningRate);
    
    std::vector<double> currentInputs = inputs;
    std::vector<std::vector<double>> layerInputs;
    layerInputs.push_back(currentInputs);
    
    // 保存每一层的输入(即前一层的输出)
    for (size_t i = 0; i < layers.size() - 1; ++i) {
        currentInputs = layers[i].forward(currentInputs);
        layerInputs.push_back(currentInputs);
    }
    
    // 从后向前更新每一层
    for (int i = static_cast<int>(layers.size()) - 1; i >= 0; --i) {
        if (i == layers.size() - 1) {
            // 输出层特殊处理
            layers[i].backward(layerInputs[i], std::vector<double>(), std::vector<std::vector<double>>(), learningRate);
        } else {
            // 隐藏层使用下一层的delta和权重
            layers[i].backward(layerInputs[i], layers[i+1].getDeltas(), layers[i+1].getWeights(), learningRate);
        }
    }
}

// 训练网络
void NeuralNetwork::train(const std::vector<std::vector<double>>& inputs, 
                         const std::vector<std::vector<double>>& targets, 
                         int epochs, double learningRate) {
    
    for (int epoch = 0; epoch < epochs; ++epoch) {
        double totalError = 0.0;
        
        for (size_t s = 0; s < inputs.size(); ++s) {
            // 前向传播
            std::vector<double> output = forward(inputs[s]);
            
            // 计算均方误差
            for (size_t i = 0; i < output.size(); ++i) {
                double error = targets[s][i] - output[i];
                totalError += error * error;
            }
            
            // 反向传播
            backward(inputs[s], targets[s], learningRate);
        }
        
        totalError /= (2 * inputs.size());
        
        if (epoch % 100 == 0) {
            std::cout << "Epoch " << epoch << ", Error: " << totalError << std::endl;
        }
    }
}

// 预测
std::vector<double> NeuralNetwork::predict(const std::vector<double>& inputs) {
    return forward(inputs);
}

4. 完整示例 - XOR问题

int main() {
    // 定义XOR问题的输入和目标输出
    std::vector<std::vector<double>> inputs = {
        {0, 0},
        {0, 1},
        {1, 0},
        {1, 1}
    };
    
    std::vector<std::vector<double>> targets = {
        {0},
        {1},
        {1},
        {0}
    };
    
    // 创建神经网络: 2-4-1结构
    std::vector<int> layerSizes = {2, 4, 1};
    NeuralNetwork network(layerSizes);
    
    // 训练网络
    std::cout << "Training neural network for XOR problem..." << std::endl;
    network.train(inputs, targets, 10000, 0.1);
    
    // 测试网络
    std::cout << "\nTesting network:" << std::endl;
    for (size_t i = 0; i < inputs.size(); ++i) {
        std::vector<double> prediction = network.predict(inputs[i]);
        std::cout << "Input: [" << inputs[i][0] << ", " << inputs[i][1] 
                  << "], Prediction: " << prediction[0] 
                  << ", Expected: " << targets[i][0] << std::endl;
    }
    
    return 0;
}

三、进阶:实现更复杂的CNN示例(C++)

让我们实现一个简单的卷积神经网络,用于处理图像分类任务。这个例子将展示如何用C++实现卷积层、池化层和全连接层。

1. 基本数据结构

#include <iostream>
#include <vector>
#include <cmath>
#include <random>
#include <algorithm>

// 3D张量类,用于表示图像或特征图
class Tensor3D {
public:
    int depth, height, width;
    std::vector<std::vector<std::vector<double>>> data;
    
    Tensor3D(int depth, int height, int width, double initialValue = 0.0) 
        : depth(depth), height(height), width(width) {
        
        data.resize(depth, std::vector<std::vector<double>>(
            height, std::vector<double>(width, initialValue)));
    }
    
    // 打印张量内容(用于调试)
    void print() const {
        for (int d = 0; d < depth; ++d) {
            std::cout << "Channel " << d << ":\n";
            for (int h = 0; h < height; ++h) {
                for (int w = 0; w < width; ++w) {
                    std::cout << data[d][h][w] << " ";
                }
                std::cout << "\n";
            }
            std::cout << "\n";
        }
    }
};

// 卷积核类
class ConvolutionKernel {
public:
    int inputChannels, outputChannels, size;
    std::vector<std::vector<std::vector<std::vector<double>>>> weights;  // [outCh][inCh][kH][kW]
    std::vector<double> biases;
    
    ConvolutionKernel(int inputChannels, int outputChannels, int size) 
        : inputChannels(inputChannels), outputChannels(outputChannels), size(size) {
        
        // 随机初始化权重
        std::random_device rd;
        std::mt19937 gen(rd());
        std::uniform_real_distribution<double> dis(-0.1, 0.1);
        
        weights.resize(outputChannels, std::vector<std::vector<std::vector<double>>>(
            inputChannels, std::vector<std::vector<double>>(
                size, std::vector<double>(size, 0.0))));
        
        for (int oc = 0; oc < outputChannels; ++oc) {
            for (int ic = 0; ic < inputChannels; ++ic) {
                for (int h = 0; h < size; ++h) {
                    for (int w = 0; w < size; ++w) {
                        weights[oc][ic][h][w] = dis(gen);
                    }
                }
            }
        }
        
        // 初始化偏置
        biases.resize(outputChannels, 0.0);
    }
};

2. CNN层实现

// 卷积层
class ConvolutionalLayer {
private:
    int inputChannels, outputChannels, kernelSize, stride, padding;
    ConvolutionKernel kernel;
    
public:
    ConvolutionalLayer(int inputChannels, int outputChannels, int kernelSize, 
                       int stride = 1, int padding = 0)
        : inputChannels(inputChannels), outputChannels(outputChannels), 
          kernelSize(kernelSize), stride(stride), padding(padding),
          kernel(inputChannels, outputChannels, kernelSize) {}
    
    Tensor3D forward(const Tensor3D& input) {
        int outputHeight = (input.height + 2 * padding - kernelSize) / stride + 1;
        int outputWidth = (input.width + 2 * padding - kernelSize) / stride + 1;
        
        Tensor3D output(outputChannels, outputHeight, outputWidth);
        
        // 实现卷积操作
        for (int oc = 0; oc < outputChannels; ++oc) {
            for (int oh = 0; oh < outputHeight; ++oh) {
                for (int ow = 0; ow < outputWidth; ++ow) {
                    double sum = kernel.biases[oc];
                    
                    for (int ic = 0; ic < inputChannels; ++ic) {
                        for (int kh = 0; kh < kernelSize; ++kh) {
                            for (int kw = 0; kw < kernelSize; ++kw) {
                                int ih = oh * stride + kh - padding;
                                int iw = ow * stride + kw - padding;
                                
                                if (ih >= 0 && ih < input.height && iw >= 0 && iw < input.width) {
                                    sum += input.data[ic][ih][iw] * kernel.weights[oc][ic][kh][kw];
                                }
                            }
                        }
                    }
                    
                    // 应用ReLU激活函数
                    output.data[oc][oh][ow] = std::max(0.0, sum);
                }
            }
        }
        
        return output;
    }
};

// 最大池化层
class MaxPoolingLayer {
private:
    int poolSize, stride;
    
public:
    MaxPoolingLayer(int poolSize, int stride = 0)
        : poolSize(poolSize), stride(stride) {
        if (stride == 0) this->stride = poolSize;  // 默认stride等于poolSize
    }
    
    Tensor3D forward(const Tensor3D& input) {
        int outputHeight = (input.height - poolSize) / stride + 1;
        int outputWidth = (input.width - poolSize) / stride + 1;
        
        Tensor3D output(input.depth, outputHeight, outputWidth);
        
        // 实现最大池化
        for (int d = 0; d < input.depth; ++d) {
            for (int oh = 0; oh < outputHeight; ++oh) {
                for (int ow = 0; ow < outputWidth; ++ow) {
                    double maxVal = -std::numeric_limits<double>::max();
                    
                    for (int ph = 0; ph < poolSize; ++ph) {
                        for (int pw = 0; pw < poolSize; ++pw) {
                            int ih = oh * stride + ph;
                            int iw = ow * stride + pw;
                            
                            if (ih < input.height && iw < input.width) {
                                maxVal = std::max(maxVal, input.data[d][ih][iw]);
                            }
                        }
                    }
                    
                    output.data[d][oh][ow] = maxVal;
                }
            }
        }
        
        return output;
    }
};

// 全连接层
class FullyConnectedLayer {
private:
    int inputSize, outputSize;
    std::vector<std::vector<double>> weights;
    std::vector<double> biases;
    std::vector<double> outputs;
    
public:
    FullyConnectedLayer(int inputSize, int outputSize) 
        : inputSize(inputSize), outputSize(outputSize) {
        
        // 随机初始化权重
        std::random_device rd;
        std::mt19937 gen(rd());
        std::uniform_real_distribution<double> dis(-0.1, 0.1);
        
        weights.resize(outputSize, std::vector<double>(inputSize));
        for (int i = 0; i < outputSize; ++i) {
            for (int j = 0; j < inputSize; ++j) {
                weights[i][j] = dis(gen);
            }
        }
        
        // 初始化偏置
        biases.resize(outputSize, 0.0);
        outputs.resize(outputSize, 0.0);
    }
    
    std::vector<double> forward(const std::vector<double>& input) {
        for (int i = 0; i < outputSize; ++i) {
            double sum = biases[i];
            for (int j = 0; j < inputSize; ++j) {
                sum += weights[i][j] * input[j];
            }
            
            // 使用ReLU激活函数
            outputs[i] = std::max(0.0, sum);
        }
        
        return outputs;
    }
    
    std::vector<double> softmax(const std::vector<double>& input) {
        std::vector<double> output(input.size());
        double maxVal = *std::max_element(input.begin(), input.end());
        double sumExp = 0.0;
        
        for (size_t i = 0; i < input.size(); ++i) {
            output[i] = std::exp(input[i] - maxVal);
            sumExp += output[i];
        }
        
        for (size_t i = 0; i < input.size(); ++i) {
            output[i] /= sumExp;
        }
        
        return output;
    }
};

3. 简单CNN网络实现

// 简单CNN网络
class SimpleCNN {
private:
    ConvolutionalLayer conv1;
    MaxPoolingLayer pool1;
    ConvolutionalLayer conv2;
    MaxPoolingLayer pool2;
    FullyConnectedLayer fc1;
    FullyConnectedLayer fc2;
    
public:
    SimpleCNN() 
        : conv1(1, 8, 3, 1, 1),     // 输入通道1, 输出通道8, 3x3卷积核
          pool1(2, 2),              // 2x2最大池化
          conv2(8, 16, 3, 1, 1),    // 输入通道8, 输出通道16, 3x3卷积核
          pool2(2, 2),              // 2x2最大池化
          fc1(16 * 7 * 7, 128),     // 全连接层: 16*7*7 -> 128
          fc2(128, 10)              // 全连接层: 128 -> 10 (假设10个分类)
    {}
    
    std::vector<double> forward(const Tensor3D& input) {
        // 前向传播
        Tensor3D out1 = conv1.forward(input);         // 28x28x1 -> 28x28x8
        Tensor3D out2 = pool1.forward(out1);          // 28x28x8 -> 14x14x8
        Tensor3D out3 = conv2.forward(out2);          // 14x14x8 -> 14x14x16
        Tensor3D out4 = pool2.forward(out3);          // 14x14x16 -> 7x7x16
        
        // 将特征图展平为一维向量
        std::vector<double> flattenedFeatures;
        for (int d = 0; d < out4.depth; ++d) {
            for (int h = 0; h < out4.height; ++h) {
                for (int w = 0; w < out4.width; ++w) {
                    flattenedFeatures.push_back(out4.data[d][h][w]);
                }
            }
        }
        
        // 全连接层
        std::vector<double> fcOut1 = fc1.forward(flattenedFeatures);
        std::vector<double> fcOut2 = fc2.forward(fcOut1);
        
        // 输出层使用softmax激活
        return fc2.softmax(fcOut2);
    }
};

4. 使用示例

int main() {
    // 创建一个简单的28x28的单通道图像(例如MNIST)
    Tensor3D image(1, 28, 28);
    
    // 这里应该加载真实图像数据
    // 为了示例,我们用随机值填充
    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<double> dis(0.0, 1.0);
    
    for (int h = 0; h < image.height; ++h) {
        for (int w = 0; w < image.width; ++w) {
            image.data[0][h][w] = dis(gen);
        }
    }
    
    // 创建CNN网络
    SimpleCNN cnn;
    
    // 前向传播
    std::vector<double> output = cnn.forward(image);
    
    // 打印分类结果
    std::cout << "Classification results:" << std::endl;
    for (size_t i = 0; i < output.size(); ++i) {
        std::cout << "Class " << i << ": " << output[i] << std::endl;
    }
    
    // 找出最可能的类别
    auto maxElement = std::max_element(output.begin(), output.end());
    int predictedClass = std::distance(output.begin(), maxElement);
    
    std::cout << "Predicted class: " << predictedClass << " with confidence " 
              << *maxElement << std::endl;
    
    return 0;
}

四、编译与运行

C语言版本

# 使用gcc编译
gcc neural_network.c -o neural_network -lm
# 运行程序
./neural_network

C++版本

# 使用g++编译
g++ neural_network.cpp -o neural_network -std=c++11
# 运行程序
./neural_network

# 编译CNN版本
g++ cnn.cpp -o cnn -std=c++11
# 运行CNN程序
./cnn

五、总结

本文详细介绍了如何使用C和C++实现简单的神经网络,包括基本的前馈神经网络和更复杂的卷积神经网络。这些实现虽然简单,但包含了神经网络的核心组件:

  1. 基本数据结构:包括层结构、权重矩阵和神经元输出
  2. 前向传播:计算网络从输入到输出的过程
  3. 激活函数:实现了sigmoid和ReLU等常用激活函数
  4. 反向传播:算法实现,用于网络训练
  5. CNN组件:卷积层、池化层和全连接层的实现

这些实现可以作为深入理解神经网络内部工作原理的基础,也可以作为高性能神经网络库的起点。对于需要极致性能或资源有限的环境(如嵌入式系统),这些C/C++实现比使用Python等高级语言的深度学习框架更为合适。

在实际应用中,你可以进一步优化这些实现,添加更多功能,如批处理、更多类型的层和优化器,以及GPU加速等。

Logo

脑启社区是一个专注类脑智能领域的开发者社区。欢迎加入社区,共建类脑智能生态。社区为开发者提供了丰富的开源类脑工具软件、类脑算法模型及数据集、类脑知识库、类脑技术培训课程以及类脑应用案例等资源。

更多推荐