Auto Byte

Science AI

# 无需反向传播的深度学习：DeepMind的合成梯度

Geoffrey Hinton 相关的工作

`class Layer(object):        def __init__(self,input_dim, output_dim,nonlin,nonlin_deriv):                self.weights = (np.random.randn(input_dim, output_dim) * 0.2) - 0.1        self.nonlin = nonlin        self.nonlin_deriv = nonlin_deriv        def forward(self,input):        self.input = input        self.output = self.nonlin(self.input.dot(self.weights))        return self.output        def backward(self,output_delta):        self.weight_output_delta = output_delta * self.nonlin_deriv(self.output)        return self.weight_output_delta.dot(self.weights.T)        def update(self,alpha=0.1):        self.weights -= self.input.T.dot(self.weight_output_delta) * alpha`

update 可能是最简单的方法。它只需要在权重的输出中使用导数，并使用它来进行权重更新。如果你对这些步骤有任何疑问，再一次地，查看 A Neural Network in 11 Lines of Python 再回来。如果你能全部理解，那么让我们在训练中看看我们的层对象。

`layer_1 = Layer(input_dim,layer_1_dim,sigmoid,sigmoid_out2deriv)layer_2 = Layer(layer_1_dim,layer_2_dim,sigmoid,sigmoid_out2deriv)layer_3 = Layer(layer_2_dim, output_dim,sigmoid, sigmoid_out2deriv)for iter in range(iterations):    error = 0    for batch_i in range(int(len(x) / batch_size)):        batch_x = x[(batch_i * batch_size):(batch_i+1)*batch_size]        batch_y = y[(batch_i * batch_size):(batch_i+1)*batch_size]                  layer_1_out = layer_1.forward(batch_x)        layer_2_out = layer_2.forward(layer_1_out)        layer_3_out = layer_3.forward(layer_2_out)        layer_3_delta = layer_3_out - batch_y        layer_2_delta = layer_3.backward(layer_3_delta)        layer_1_delta = layer_2.backward(layer_2_delta)        layer_1.backward(layer_1_delta)                layer_1.update()        layer_2.update()        layer_3.update()`

`class DNI(object):        def __init__(self,input_dim, output_dim,nonlin,nonlin_deriv,alpha = 0.1):                # same as before        self.weights = (np.random.randn(input_dim, output_dim) * 0.2) - 0.1        self.nonlin = nonlin        self.nonlin_deriv = nonlin_deriv        # new stuff        self.weights_synthetic_grads = (np.random.randn(output_dim,output_dim) * 0.2) - 0.1        self.alpha = alpha        # used to be just "forward", but now we update during the forward pass using Synthetic Gradients :)    def forward_and_synthetic_update(self,input):    	# cache input        self.input = input        # forward propagate        self.output = self.nonlin(self.input.dot(self.weights))                # generate synthetic gradient via simple linear transformation        self.synthetic_gradient = self.output.dot(self.weights_synthetic_grads)        # update our regular weights using synthetic gradient        self.weight_synthetic_gradient = self.synthetic_gradient * self.nonlin_deriv(self.output)        self.weights += self.input.T.dot(self.weight_synthetic_gradient) * self.alpha                # return backpropagated synthetic gradient (this is like the output of "backprop" method from the Layer class)        # also return forward propagated output (feels weird i know... )        return self.weight_synthetic_gradient.dot(self.weights.T), self.output        # this is just like the "update" method from before... except it operates on the synthetic weights    def update_synthetic_weights(self,true_gradient):        self.synthetic_gradient_delta = self.synthetic_gradient - true_gradient         self.weights_synthetic_grads += self.output.T.dot(self.synthetic_gradient_delta) * self.alpha        `

Magitec・CTO