这是我的第一个神经网络,以前我只有一个隐藏层。我现在给它一个可调的隐藏层数。这一切都很完美(因为我没有nans和infs)。我特别避免了指针的使用,因为我对编码还是很陌生的,并且现在希望保持尽可能的简单。因此,我只想知道,这段代码会给我带来多大的问题(我将添加异常处理),或者是否有人能发现任何效率低下的地方。
del ==梯度向量,a(x) ==预激活函数,h(x) ==激活函数,W ==权值,B==偏差,(-logf(x)y) ==损失函数值,e(y) ==一个热点向量(全部为0's和1),F(x) ==激活后输出矢量(使用softmax作为3个输出神经元)
计算输出梯度-> del a(L+1)(x)-logf(x)y <= -(e(y) - F(x))
K从L+1到1
计算隐层参数梯度(权重梯度)
-logf(X)y <= (del a(k)(x)-logf(x)y) h(k-1)(x)
del B(k)-logf(x)y <= del a(k)(x)-logf(x)y
计算下面隐藏层的梯度
del h(k-1)(x)- logf(x)y <= W(k) * (del a(k)(x)-logf(x)y)
del a(k-1)-logf(x)y <= (del h(k-1)(x)-logf(x)y) "dot“(激活导子a(k-1)(X))
利用经验风险最小化:
Delta = -del W-logf(x)y -(lamba*正则化器)//我使用过L1
参数=参数+α*增量。
如果这是不够的代码和它可以张贴更多,请告诉我,这是我第一次在这里张贴,我担心有太多的阅读。这是代码:
void backprop(Net& net, std::vector<double>& target)
{//note to reader- The weights for each neuron is stored in the previous
//neuron as a vector
// and the biases for each neuron is stored in the neuron itself as a
//double.
// net.hiddenneurons is a two dimensional vector containing each layer
//of hidden neurons
double PD{};
for (size_t opd = 0; opd < net.outputneurons.size(); opd++)
{
net.outputneurons[opd].preactvalpd = -(target[opd] -net.outputneurons[opd].actval);
PD = net.outputneurons[opd].preactvalpd * -1;
net.outputneurons[opd].bias = net.outputneurons[opd].bias + (net.alpha * PD);
PD = 0;
}
for (size_t hs = net.hiddenneurons.size()-1; hs > -1; hs--)
{
int layerup = hs + 1;
for (size_t current = 0; current < net.hiddenneurons[hs].size(); current++)
{
if (hs == net.hiddenneurons.size() - 1)
{
for (size_t wpd = 0; wpd < net.hiddenneurons[hs]
[current].weights.size(); wpd++)
{
PD = net.outputneurons[wpd].preactvalpd *
net.hiddenneurons[hs][current].actval;
PD = PD * -1;
net.hiddenneurons[hs][current].weights[wpd] =
net.hiddenneurons[hs][current].weights[wpd] + (net.alpha
* (PD - (net.lambda *
regularizer(net.hiddenneurons[hs]
[current].weights[wpd]))));//is this even correct
//use of regularizer?
PD = 0;
//I have combined finding the partial derivative of the
//weights with updating the
//weights, PD is the partial derivative.
//I have done the same for biases.
}
for (size_t op = 0; op < net.outputneurons.size(); op++)
{
PD += net.hiddenneurons[hs][current].weights[op] *
net.outputneurons[op].preactvalpd;
}
net.hiddenneurons[hs][current].actvalPD = PD;
PD = 0;
}
else
{
for (size_t wpd = 0; wpd < net.hiddenneurons[hs]
[current].weights.size(); wpd++)
{
PD = net.hiddenneurons[layerup][wpd].preactvalpd *
net.hiddenneurons[hs[current].actval;
PD = PD * -1;
net.hiddenneurons[hs][current].weights[wpd] =
net.hiddenneurons[hs][current].weights[wpd] + (net.alpha
* (PD - (net.lambda *
regularizer(net.hiddenneurons[hs]
[current].weights[wpd]))));
PD = 0;
}
for (size_t op = 0; op < net.hiddenneurons[layerup].size();
op++)
{
PD += net.hiddenneurons[hs][current].weights[op] *
net.hiddenneurons[layerup][op].preactvalpd;
}
net.hiddenneurons[hs][current].actvalPD = PD;
PD = 0;
}
net.hiddenneurons[hs][current].preactvalpd =
net.hiddenneurons[hs][current].actvalPD *
tanhderiv(net.hiddenneurons[hs][current].preactval);
net.hiddenneurons[hs][current].bias =
net.hiddenneurons[hs][current].bias + (net.alpha *
net.hiddenneurons[hs][current].preactvalpd);
}
}
for (size_t iw = 0; iw < net.inneurons.size(); iw++)
{
for (size_t hpad = 0; hpad < net.inneurons[iw].weights.size();
hpad++)
{
PD = net.hiddenneurons[0][hpad].preactvalpd *
net.inneurons[iw].val;
PD = PD * -1;
net.inneurons[iw].weights[hpad] =
net.inneurons[iw].weights[hpad] + (net.alpha * (PD -
(net.lambda *
regularizer(net.inneurons[iw].weights[hpad]))));
PD = 0;
}
}
std::cout << "backprop done" << '\n';
}其余代码:
double randomt(int x, int y)
{
std::random_device rd;
std::mt19937 mt(rd());
std::uniform_real_distribution<double> dist(x, y);
return dist(mt);
}
class InputN
{
public:
double val{};
std::vector <double> weights{};
};
class HiddenN
{
public:
double preactval{};
double actval{};
double actvalPD{};
double preactvalpd{};
std::vector <double> weights{};
double bias{};
};
class OutputN
{
public:
double preactval{};
double actval{};
double preactvalpd{};
double bias{};
};
class Net
{
public:
std::vector <InputN> inneurons{};
std::vector <std::vector <HiddenN>> hiddenneurons{};
std::vector <OutputN> outputneurons{};
double lambda{ 0.015 };
double alpha{ 0.015 };
};
void feedforward(Net& net)
{
double sum{};
int prevlayer{};
for (size_t Hsize = 0; Hsize < net.hiddenneurons.size(); Hsize++)
{
//std::cout << "in first loop" << '\n';
prevlayer = Hsize - 1;
for (size_t Hel = 0; Hel < net.hiddenneurons[Hsize].size(); Hel++)
{
//std::cout << "in second loop" << '\n';
if (Hsize == 0)
{
//std::cout << "in first if" << '\n';
for (size_t Isize = 0; Isize < net.inneurons.size(); Isize++)
{
//std::cout << "in fourth loop" << '\n';
sum += (net.inneurons[Isize].val *
net.inneurons[Isize].weights[Hel]);
}
net.hiddenneurons[Hsize][Hel].preactval =
net.hiddenneurons[Hsize][Hel].bias + sum;
net.hiddenneurons[Hsize][Hel].actval = tanh(sum);
sum = 0;
//std::cout << "first if done" << '\n';
}
else
{
//std::cout << "in else" << '\n';
for (size_t prs = 0; prs <
net.hiddenneurons[prevlayer].size(); prs++)
{
//std::cout << "in fourth loop" << '\n';
sum += net.hiddenneurons[prevlayer][prs].actval *
net.hiddenneurons[prevlayer][prs].weights[Hel];
}
//std::cout << "fourth loop done" << '\n';
net.hiddenneurons[Hsize][Hel].preactval =
net.hiddenneurons[Hsize][Hel].bias + sum;
net.hiddenneurons[Hsize][Hel].actval = tanh(sum);
//std::cout << "else done" << '\n';
sum = 0;
}
}
}
//std::cout << "first loop done " << '\n';
int lasthid = net.hiddenneurons.size() - 1;
for (size_t Osize = 0; Osize < net.outputneurons.size(); Osize++)
{
for (size_t Hsize = 0; Hsize < net.hiddenneurons[lasthid].size();
Hsize++)
{
sum += (net.hiddenneurons[lasthid][Hsize].actval *
net.hiddenneurons[lasthid][Hsize].weights[Osize]);
}
net.outputneurons[Osize].preactval = net.outputneurons[Osize].bias +
sum;
}
}
void softmax(Net& net)
{
double sum{};
for (size_t Osize = 0; Osize < net.outputneurons.size(); Osize++)
{
sum += exp(net.outputneurons[Osize].preactval);
}
for (size_t Osize = 0; Osize < net.outputneurons.size(); Osize++)
{
net.outputneurons[Osize].actval =
exp(net.outputneurons[Osize].preactval) / sum;
}
}
double regularizer(double weight)
{
double absval{};
if (weight < 0) absval = weight - weight - weight;
else if (weight > 0 || weight == 0) absval = weight;
else;
if (absval > 0) return 1;
else if (absval < 0) return -1;
else if (absval == 0) return 0;
else return 2;
// I will add a exception handler for when this function returns 2
}
void lossfunc(Net& net, std::vector <double> target)
{
int pos{ -1 };
double val{};
for (size_t t = 0; t < target.size(); t++)
{
pos += 1;
if (target[t] > 0)
{
break;
}
}
for (size_t s = 0; net.outputneurons.size(); s++)
{
val = -log(net.outputneurons[pos].actval);
}
}
int main()
{
std::vector <double> invals{ };
std::vector <double> target{ };
Net net;
InputN Ineuron;
HiddenN Hneuron;
OutputN Oneuron;
int IN = 4;
int HIDLAYERS = 1;
int HID = 8;
int OUT = 3;
for (int i = 0; i < IN; i++)
{
net.inneurons.push_back(Ineuron);
for (int m = 0; m < HID; m++)
{
net.inneurons.back().weights.push_back(randomt(0, 1));
}
}
//std::cout << "first loop done" << '\n';
for (int s = 0; s < HIDLAYERS; s++)
{
net.hiddenneurons.push_back(std::vector <HiddenN>());
if (s == HIDLAYERS - 1)
{
for (int i = 0; i < HID; i++)
{
net.hiddenneurons[s].push_back(Hneuron);
for (int m = 0; m < OUT; m++)
{
net.hiddenneurons[s].back().weights.push_back(randomt(0,
1));
}
net.hiddenneurons[s].back().bias = randomt(0, 1);
}
}
else
{
for (int i = 0; i < HID; i++)
{
net.hiddenneurons[s].push_back(Hneuron);
for (int m = 0; m < HID; m++)
{
net.hiddenneurons[s].back().weights.push_back(randomt(0,
1));
}
net.hiddenneurons[s].back().bias = randomt(0, 1);
}
}
}
//std::cout << "second loop done" << '\n';
for (int i = 0; i < OUT; i++)
{
net.outputneurons.push_back(Oneuron);
net.outputneurons.back().bias = randomt(0, 1);
}
//std::cout << "third loop done" << '\n';
int count{};
std::ifstream fileread("N.txt");
for (int epoch = 0; epoch < 500; epoch++)
{
count = 0;
fileread.clear(); fileread.seekg(0, std::ios::beg);
while (fileread.is_open())
{
std::cout << '\n' << "epoch: " << epoch << '\n';
std::string fileline{};
fileread >> fileline;
if (fileline == "in:")
{
std::string input{};
double nums{};
std::getline(fileread, input);
std::stringstream ss(input);
while (ss >> nums)
{
invals.push_back(nums);
}
}
if (fileline == "out:")
{
std::string output{};
double num{};
std::getline(fileread, output);
std::stringstream ss(output);
while (ss >> num)
{
target.push_back(num);
}
}
count += 1;
if (count == 2)
{
for (size_t inv = 0; inv < invals.size(); inv++)
{
net.inneurons[inv].val = invals[inv];
}
//std::cout << "calling feedforward" << '\n';
feedforward(net);
//std::cout << "ff done" << '\n';
softmax(net);
printvals("output", net);//this is just to print weights and
//biases
std::cout << "target: " << '\n';
for (auto element : target) std::cout << element << " / ";
std::cout << '\n';
backprop(net, target);
invals.clear();
target.clear();
count = 0;
}
if (fileread.eof()) break;
}
}
//std::cout << "fourth loop done" << '\n';
return 1;
}我知道代码中可能有相当多的低效率,但我不希望任何人指出所有的问题。只要指出最大的问题,我仍然在学习,任何批评都是非常受欢迎的。输入文件如下:示例- in: 0.45 0.62 0.78 0.94 out: 0.0 0.0 1.0 --这来自我编写的函数,它基本上只是向向量中添加了4个随机数,将这4个数字之和,如果和小于1,输出为1.0 0.0 0.0,如果和在1和2输出之间,则为0.0 1.0 0.0,否则为0.0 0.0 1.0。
只是给那些想知道这是个私人项目的人写个便条。我不是在大学或学校,我在自学:)所以这不是作业什么的。
发布于 2021-01-14 20:03:23
尽管有可能,您不久将向我们提供完整的代码,但乍一看,有一些提示:
https://codereview.stackexchange.com/questions/254696
复制相似问题