1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
|
#include "Activation.h"
namespace TNet {
void Sigmoid::PropagateFnc(const BfMatrix& X, BfMatrix& Y) {
//Y = 1/(1+e^{-X})
for(size_t r=0; r<X.Rows(); r++) {
for(size_t c=0; c<X.Cols(); c++) {
Y(r,c) = 1.0f/(1.0f+exp(-X(r,c)));
}
}
}
void Sigmoid::BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y) {
const Matrix<BaseFloat>& out = GetOutput();
//Y = OUT*(1-OUT)*X //ODVOZENO
for(size_t r=0; r<X.Rows(); r++) {
for(size_t c=0; c<X.Cols(); c++) {
Y(r,c) = X(r,c)*out(r,c)*(1.0f-out(r,c));
}
}
}
void Softmax::PropagateFnc(const BfMatrix& X, BfMatrix& Y) {
//Y_j = e^X_j / sum_i(e^X_i)
//
// e^(X_j+c) / sum_i(e^X_i+c)
// = e^c.e^X_h / e^c.sum_i(e^X_i)
// = e^X_j / sum_i(e^X_i)
//
size_t rows = X.Rows();
for(size_t i=0; i<rows; i++) {
BfSubVector y_i(Y[i]); //<< y_i gets pointer to i'th row of matrix Y
y_i.Copy(X[i]);
BaseFloat max = y_i.Max();
y_i.Subtract(max);
y_i.ApplyExp();
BaseFloat sum = y_i.Sum();
y_i.Scale(1.0f/sum);
}
}
void Softmax::BackpropagateFnc(const BfMatrix& X, BfMatrix& Y) {
//simply copy the error...,
Y.Copy(X);
}
void BlockSoftmax::ReadFromStream(std::istream& rIn) {
rIn >> mDim;
mDimOffset.Init(mDim.Dim()+1);
int off=0;
for(int i=0; i<mDim.Dim(); i++) {
mDimOffset[i]=off;
off += mDim[i];
}
mDimOffset[mDim.Dim()]=off;
if(off!=GetNOutputs()) {
KALDI_ERR << "Non-matching dimension of sum of softmaxes,"
<< " the sum:" << off
<< " GetNOutputs:" << GetNOutputs();
}
}
void BlockSoftmax::WriteToStream(std::ostream& rOut) {
rOut << mDim;
}
void BlockSoftmax::PropagateFnc(const BfMatrix& X, BfMatrix& Y) {
//Y_j = e^X_j / sum_i(e^X_i)
//
// e^(X_j+c) / sum_i(e^X_i+c)
// = e^c.e^X_h / e^c.sum_i(e^X_i)
// = e^X_j / sum_i(e^X_i)
//
size_t rows = X.Rows();
for(size_t i=0; i<rows; i++) {
BfSubVector y_i(Y[i]); //<< y_i gets pointer to i'th row of matrix Y
y_i.Copy(X[i]);
//BaseFloat max = y_i.Max();
//y_i.Subtract(max);
//y_i.ApplyExp();
//normalize separately on each softmax interval...
for(int j=0; j<mDim.Dim(); j++) {
BfSubVector y_i_smx_j(y_i.Range(mDimOffset[j],mDim[j]));
BaseFloat max = y_i_smx_j.Max();
y_i_smx_j.Subtract(max);
y_i_smx_j.ApplyExp();
BaseFloat sum = y_i_smx_j.Sum();
y_i_smx_j.Scale(1.0f/sum);
}
}
// X.CheckData("BlockSoftmax PropagateFnc X");
// Y.CheckData("BlockSoftmax PropagateFnc Y");
}
void BlockSoftmax::BackpropagateFnc(const BfMatrix& X, BfMatrix& Y) {
//set the output to zero
Y.Zero();
//copy only parts of the error
//from softmax intervals which sum up to 0.0, not 1.0
for(int i=0; i<X.Rows(); i++) {
for(int j=0; j<mDim.Dim(); j++) {
const BfSubVector x_i_smx_j(X[i].Range(mDimOffset[j],mDim[j]));
BaseFloat sum = x_i_smx_j.Sum();
if(sum > -0.1 && sum < 0.1) {
BfSubVector y_i_smx_j(Y[i].Range(mDimOffset[j],mDim[j]));
y_i_smx_j.Copy(x_i_smx_j);
} else if (sum > 0.9 && sum < 1.1) {
; //do nothing
} else {
KALDI_ERR << "Invalid sum: " << sum;
}
}
}
// X.CheckData("BlockSoftmax BackpropagateFnc X");
// Y.CheckData("BlockSoftmax BackpropagateFnc Y");
}
} //namespace TNet
|