
55、深度学习-自学之路-自己搭建深度学习框架-16、使用LSTM解决RNN梯度消失和梯度爆炸的问题,重写莎士比亚风格文章。在我的资源里面有对应的shakesper.txt的训练集下载
global min_loss# 声明 min_loss 是全局变量。进程已结束,退出代码为 0。
·
import numpy as np class Tensor(object): def __init__(self, data, autograd=False, creators=None, creation_op=None, id=None): self.data = np.array(data) self.autograd = autograd self.grad = None if (id is None): self.id = np.random.randint(0, 1000000000) else: self.id = id self.creators = creators self.creation_op = creation_op self.children = {} if (creators is not None): for c in creators: if (self.id not in c.children): c.children[self.id] = 1 else: c.children[self.id] += 1 def all_children_grads_accounted_for(self): for id, cnt in self.children.items(): if (cnt != 0): return False return True def backward(self, grad=None, grad_origin=None): if (self.autograd): if (grad is None): grad = Tensor(np.ones_like(self.data)) if (grad_origin is not None): if (self.children[grad_origin.id] == 0): return print(self.id) print(self.creation_op) print(len(self.creators)) for c in self.creators: print(c.creation_op) raise Exception("cannot backprop more than once") else: self.children[grad_origin.id] -= 1 if (self.grad is None): self.grad = grad else: self.grad += grad # grads must not have grads of their own assert grad.autograd == False # only continue backpropping if there's something to # backprop into and if all gradients (from children) # are accounted for override waiting for children if # "backprop" was called on this variable directly if (self.creators is not None and (self.all_children_grads_accounted_for() or grad_origin is None)): if (self.creation_op == "add"): self.creators[0].backward(self.grad, self) self.creators[1].backward(self.grad, self) if (self.creation_op == "sub"): self.creators[0].backward(Tensor(self.grad.data), self) self.creators[1].backward(Tensor(self.grad.__neg__().data), self) if (self.creation_op == "mul"): new = self.grad * self.creators[1] self.creators[0].backward(new, self) new = self.grad * self.creators[0] self.creators[1].backward(new, self) if (self.creation_op == "mm"): c0 = self.creators[0] c1 = self.creators[1] new = self.grad.mm(c1.transpose()) c0.backward(new) new = self.grad.transpose().mm(c0).transpose() c1.backward(new) if (self.creation_op == "transpose"): self.creators[0].backward(self.grad.transpose()) if ("sum" in self.creation_op): dim = int(self.creation_op.split("_")[1]) self.creators[0].backward(self.grad.expand(dim, self.creators[0].data.shape[dim])) if ("expand" in self.creation_op): dim = int(self.creation_op.split("_")[1]) self.creators[0].backward(self.grad.sum(dim)) if (self.creation_op == "neg"): self.creators[0].backward(self.grad.__neg__()) if (self.creation_op == "sigmoid"): ones = Tensor(np.ones_like(self.grad.data)) self.creators[0].backward(self.grad * (self * (ones - self))) if (self.creation_op == "tanh"): ones = Tensor(np.ones_like(self.grad.data)) self.creators[0].backward(self.grad * (ones - (self * self))) if (self.creation_op == "index_select"): new_grad = np.zeros_like(self.creators[0].data) indices_ = self.index_select_indices.data.flatten() grad_ = grad.data.reshape(len(indices_), -1) for i in range(len(indices_)): new_grad[indices_[i]] += grad_[i] self.creators[0].backward(Tensor(new_grad)) if (self.creation_op == "cross_entropy"): dx = self.softmax_output - self.target_dist self.creators[0].backward(Tensor(dx)) def __add__(self, other): if (self.autograd and other.autograd): return Tensor(self.data + other.data, autograd=True, creators=[self, other], creation_op="add") return Tensor(self.data + other.data) def __neg__(self): if (self.autograd): return Tensor(self.data * -1, autograd=True, creators=[self], creation_op="neg") return Tensor(self.data * -1) def __sub__(self, other): if (self.autograd and other.autograd): return Tensor(self.data - other.data, autograd=True, creators=[self, other], creation_op="sub") return Tensor(self.data - other.data) def __mul__(self, other): if (self.autograd and other.autograd): return Tensor(self.data * other.data, autograd=True, creators=[self, other], creation_op="mul") return Tensor(self.data * other.data) def sum(self, dim): if (self.autograd): return Tensor(self.data.sum(dim), autograd=True, creators=[self], creation_op="sum_" + str(dim)) return Tensor(self.data.sum(dim)) def expand(self, dim, copies): trans_cmd = list(range(0, len(self.data.shape))) trans_cmd.insert(dim, len(self.data.shape)) new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd) if (self.autograd): return Tensor(new_data, autograd=True, creators=[self], creation_op="expand_" + str(dim)) return Tensor(new_data) def transpose(self): if (self.autograd): return Tensor(self.data.transpose(), autograd=True, creators=[self], creation_op="transpose") return Tensor(self.data.transpose()) def mm(self, x): if (self.autograd): return Tensor(self.data.dot(x.data), autograd=True, creators=[self, x], creation_op="mm") return Tensor(self.data.dot(x.data)) def sigmoid(self): if (self.autograd): return Tensor(1 / (1 + np.exp(-self.data)), autograd=True, creators=[self], creation_op="sigmoid") return Tensor(1 / (1 + np.exp(-self.data))) def tanh(self): if (self.autograd): return Tensor(np.tanh(self.data), autograd=True, creators=[self], creation_op="tanh") return Tensor(np.tanh(self.data)) def index_select(self, indices): if (self.autograd): new = Tensor(self.data[indices.data], autograd=True, creators=[self], creation_op="index_select") new.index_select_indices = indices return new return Tensor(self.data[indices.data]) def softmax(self): temp = np.exp(self.data) softmax_output = temp / np.sum(temp, axis=len(self.data.shape) - 1, keepdims=True) return softmax_output def cross_entropy(self, target_indices): temp = np.exp(self.data) softmax_output = temp / np.sum(temp, axis=len(self.data.shape) - 1, keepdims=True) t = target_indices.data.flatten() p = softmax_output.reshape(len(t), -1) target_dist = np.eye(p.shape[1])[t] loss = -(np.log(p) * (target_dist)).sum(1).mean() if (self.autograd): out = Tensor(loss, autograd=True, creators=[self], creation_op="cross_entropy") out.softmax_output = softmax_output out.target_dist = target_dist return out return Tensor(loss) def __repr__(self): return str(self.data.__repr__()) def __str__(self): return str(self.data.__str__()) class Layer(object): def __init__(self): self.parameters = list() def get_parameters(self): return self.parameters class SGD(object): def __init__(self, parameters, alpha=0.1): self.parameters = parameters self.alpha = alpha def zero(self): for p in self.parameters: p.grad.data *= 0 def step(self, zero=True): for p in self.parameters: p.data -= p.grad.data * self.alpha if (zero): p.grad.data *= 0 class Linear(Layer): def __init__(self, n_inputs, n_outputs, bias=True): super().__init__() self.use_bias = bias W = np.random.randn(n_inputs, n_outputs) * np.sqrt(2.0 / (n_inputs)) self.weight = Tensor(W, autograd=True) if (self.use_bias): self.bias = Tensor(np.zeros(n_outputs), autograd=True) self.parameters.append(self.weight) if (self.use_bias): self.parameters.append(self.bias) def forward(self, input): if (self.use_bias): return input.mm(self.weight) + self.bias.expand(0, len(input.data)) return input.mm(self.weight) class Sequential(Layer): def __init__(self, layers=list()): super().__init__() self.layers = layers def add(self, layer): self.layers.append(layer) def forward(self, input): for layer in self.layers: input = layer.forward(input) return input def get_parameters(self): params = list() for l in self.layers: params += l.get_parameters() return params class Embedding(Layer): def __init__(self, vocab_size, dim): super().__init__() self.vocab_size = vocab_size self.dim = dim # this random initialiation style is just a convention from word2vec self.weight = Tensor((np.random.rand(vocab_size, dim) - 0.5) / dim, autograd=True) self.parameters.append(self.weight) def forward(self, input): return self.weight.index_select(input) class Tanh(Layer): def __init__(self): super().__init__() def forward(self, input): return input.tanh() class Sigmoid(Layer): def __init__(self): super().__init__() def forward(self, input): return input.sigmoid() class CrossEntropyLoss(object): def __init__(self): super().__init__() def forward(self, input, target): return input.cross_entropy(target) class RNNCell(Layer): def __init__(self, n_inputs, n_hidden, n_output, activation='sigmoid'): super().__init__() self.n_inputs = n_inputs self.n_hidden = n_hidden self.n_output = n_output if (activation == 'sigmoid'): self.activation = Sigmoid() elif (activation == 'tanh'): self.activation == Tanh() else: raise Exception("Non-linearity not found") self.w_ih = Linear(n_inputs, n_hidden) self.w_hh = Linear(n_hidden, n_hidden) self.w_ho = Linear(n_hidden, n_output) self.parameters += self.w_ih.get_parameters() self.parameters += self.w_hh.get_parameters() self.parameters += self.w_ho.get_parameters() def forward(self, input, hidden): from_prev_hidden = self.w_hh.forward(hidden) combined = self.w_ih.forward(input) + from_prev_hidden new_hidden = self.activation.forward(combined) output = self.w_ho.forward(new_hidden) return output, new_hidden def init_hidden(self, batch_size=1): return Tensor(np.zeros((batch_size, self.n_hidden)), autograd=True) class LSTMCell(Layer): def __init__(self, n_inputs, n_hidden, n_output): super().__init__() self.n_inputs = n_inputs self.n_hidden = n_hidden self.n_output = n_output self.xf = Linear(n_inputs, n_hidden) self.xi = Linear(n_inputs, n_hidden) self.xo = Linear(n_inputs, n_hidden) self.xc = Linear(n_inputs, n_hidden) self.hf = Linear(n_hidden, n_hidden, bias=False) self.hi = Linear(n_hidden, n_hidden, bias=False) self.ho = Linear(n_hidden, n_hidden, bias=False) self.hc = Linear(n_hidden, n_hidden, bias=False) self.w_ho = Linear(n_hidden, n_output, bias=False) self.parameters += self.xf.get_parameters() self.parameters += self.xi.get_parameters() self.parameters += self.xo.get_parameters() self.parameters += self.xc.get_parameters() self.parameters += self.hf.get_parameters() self.parameters += self.hi.get_parameters() self.parameters += self.ho.get_parameters() self.parameters += self.hc.get_parameters() self.parameters += self.w_ho.get_parameters() def forward(self, input, hidden): prev_hidden = hidden[0] prev_cell = hidden[1] f = (self.xf.forward(input) + self.hf.forward(prev_hidden)).sigmoid() i = (self.xi.forward(input) + self.hi.forward(prev_hidden)).sigmoid() o = (self.xo.forward(input) + self.ho.forward(prev_hidden)).sigmoid() g = (self.xc.forward(input) + self.hc.forward(prev_hidden)).tanh() c = (f * prev_cell) + (i * g) h = o * c.tanh() output = self.w_ho.forward(h) return output, (h, c) def init_hidden(self, batch_size=1): init_hidden = Tensor(np.zeros((batch_size, self.n_hidden)), autograd=True) init_cell = Tensor(np.zeros((batch_size, self.n_hidden)), autograd=True) init_hidden.data[:, 0] += 1 init_cell.data[:, 0] += 1 return (init_hidden, init_cell) import sys,random,math from collections import Counter import numpy as np import sys np.random.seed(0) # dataset from http://karpathy.github.io/2015/05/21/rnn-effectiveness/ #f = open('shakespear.txt','r') f = open('shakesper.txt', 'r') raw = f.read() f.close() vocab = list(set(raw)) word2index = {} for i,word in enumerate(vocab): word2index[word]=i indices = np.array(list(map(lambda x:word2index[x], raw))) embed = Embedding(vocab_size=len(vocab),dim=512) model = LSTMCell(n_inputs=512, n_hidden=512, n_output=len(vocab)) model.w_ho.weight.data *= 0 criterion = CrossEntropyLoss() optim = SGD(parameters=model.get_parameters() + embed.get_parameters(), alpha=0.05) def generate_sample(n=30, init_char=' '): s = "" hidden = model.init_hidden(batch_size=1) input = Tensor(np.array([word2index[init_char]])) for i in range(n): rnn_input = embed.forward(input) output, hidden = model.forward(input=rnn_input, hidden=hidden) # output.data *= 25 # temp_dist = output.softmax() # temp_dist /= temp_dist.sum() # m = (temp_dist > np.random.rand()).argmax() m = output.data.argmax() c = vocab[m] input = Tensor(np.array([m])) s += c return s batch_size = 16 bptt = 25 n_batches = int((indices.shape[0] / (batch_size))) trimmed_indices = indices[:n_batches*batch_size] batched_indices = trimmed_indices.reshape(batch_size, n_batches).transpose() input_batched_indices = batched_indices[0:-1] target_batched_indices = batched_indices[1:] n_bptt = int(((n_batches-1) / bptt)) input_batches = input_batched_indices[:n_bptt*bptt].reshape(n_bptt,bptt,batch_size) target_batches = target_batched_indices[:n_bptt*bptt].reshape(n_bptt, bptt, batch_size) min_loss = 1000 def train(iterations=40): #iterations=400 for iter in range(iterations): global min_loss # 声明 min_loss 是全局变量 total_loss = 0 n_loss = 0 hidden = model.init_hidden(batch_size=batch_size) batches_to_train = len(input_batches) # batches_to_train = 32 for batch_i in range(batches_to_train): hidden = (Tensor(hidden[0].data, autograd=True), Tensor(hidden[1].data, autograd=True)) losses = list() for t in range(bptt): input = Tensor(input_batches[batch_i][t], autograd=True) rnn_input = embed.forward(input=input) output, hidden = model.forward(input=rnn_input, hidden=hidden) target = Tensor(target_batches[batch_i][t], autograd=True) batch_loss = criterion.forward(output, target) if(t == 0): losses.append(batch_loss) else: losses.append(batch_loss + losses[-1]) loss = losses[-1] loss.backward() optim.step() total_loss += loss.data / bptt epoch_loss = np.exp(total_loss / (batch_i+1)) if(epoch_loss < min_loss): min_loss = epoch_loss print() log = "\r Iter:" + str(iter) log += " - Alpha:" + str(optim.alpha)[0:5] log += " - Batch "+str(batch_i+1)+"/"+str(len(input_batches)) log += " - Min Loss:" + str(min_loss)[0:5] log += " - Loss:" + str(epoch_loss) if(batch_i == 0): log += " - " + generate_sample(n=70, init_char='T').replace("\n"," ") if(batch_i % 1 == 0): sys.stdout.write(log) optim.alpha *= 0.99 train(5) #train(10) print(generate_sample(n=500, init_char='\n')) ''' Iter:0 - Alpha:0.05 - Batch 2/2788 - Min Loss:64.99 - Loss:65.00011100353602 Iter:0 - Alpha:0.05 - Batch 3/2788 - Min Loss:64.97 - Loss:64.97125482131345 Iter:0 - Alpha:0.05 - Batch 4/2788 - Min Loss:64.92 - Loss:64.9255137563867 Iter:0 - Alpha:0.05 - Batch 5/2788 - Min Loss:64.81 - Loss:64.81780538122808 Iter:0 - Alpha:0.05 - Batch 6/2788 - Min Loss:64.59 - Loss:64.59206297117493 Iter:0 - Alpha:0.05 - Batch 7/2788 - Min Loss:64.21 - Loss:64.21087681480775 Iter:0 - Alpha:0.05 - Batch 8/2788 - Min Loss:63.38 - Loss:63.3836649986421 Iter:0 - Alpha:0.05 - Batch 9/2788 - Min Loss:61.87 - Loss:61.87748115624768 Iter:0 - Alpha:0.05 - Batch 10/2788 - Min Loss:59.30 - Loss:59.30033165407521 Iter:0 - Alpha:0.05 - Batch 11/2788 - Min Loss:56.62 - Loss:56.62692002039265 Iter:0 - Alpha:0.05 - Batch 12/2788 - Min Loss:53.64 - Loss:53.64444557804619 Iter:0 - Alpha:0.05 - Batch 13/2788 - Min Loss:51.78 - Loss:51.780488768462384 Iter:0 - Alpha:0.05 - Batch 14/2788 - Min Loss:51.76 - Loss:51.76064561154201 Iter:0 - Alpha:0.05 - Batch 15/2788 - Min Loss:50.75 - Loss:50.75290221773698 Iter:0 - Alpha:0.05 - Batch 16/2788 - Min Loss:49.03 - Loss:49.03450085728163 Iter:0 - Alpha:0.05 - Batch 17/2788 - Min Loss:47.77 - Loss:47.77659027936774 Iter:0 - Alpha:0.05 - Batch 18/2788 - Min Loss:47.76 - Loss:47.769863520076136 Iter:0 - Alpha:0.05 - Batch 19/2788 - Min Loss:46.88 - Loss:46.8847505554376 Iter:0 - Alpha:0.05 - Batch 20/2788 - Min Loss:45.48 - Loss:45.48606082600269 Iter:0 - Alpha:0.05 - Batch 22/2788 - Min Loss:44.50 - Loss:44.60101243058189 Iter:0 - Alpha:0.05 - Batch 23/2788 - Min Loss:43.69 - Loss:43.696149757572336 Iter:0 - Alpha:0.05 - Batch 24/2788 - Min Loss:42.92 - Loss:42.929997147269525 Iter:0 - Alpha:0.05 - Batch 25/2788 - Min Loss:42.70 - Loss:42.701717195302756 Iter:0 - Alpha:0.05 - Batch 26/2788 - Min Loss:42.21 - Loss:42.21611881004273 Iter:0 - Alpha:0.05 - Batch 27/2788 - Min Loss:41.38 - Loss:41.38657873810713 Iter:0 - Alpha:0.05 - Batch 28/2788 - Min Loss:41.24 - Loss:41.24750360990076 Iter:0 - Alpha:0.05 - Batch 29/2788 - Min Loss:40.68 - Loss:40.68153692034208 Iter:0 - Alpha:0.05 - Batch 30/2788 - Min Loss:40.03 - Loss:40.03476921197549 Iter:0 - Alpha:0.05 - Batch 31/2788 - Min Loss:39.54 - Loss:39.54761249019825 Iter:0 - Alpha:0.05 - Batch 32/2788 - Min Loss:39.36 - Loss:39.362854145263974 Iter:0 - Alpha:0.05 - Batch 33/2788 - Min Loss:38.98 - Loss:38.983527840348266 Iter:0 - Alpha:0.05 - Batch 34/2788 - Min Loss:38.86 - Loss:38.86414351625717 Iter:0 - Alpha:0.05 - Batch 35/2788 - Min Loss:38.36 - Loss:38.36348682993047 Iter:0 - Alpha:0.05 - Batch 36/2788 - Min Loss:38.00 - Loss:38.00899719388536 Iter:0 - Alpha:0.05 - Batch 37/2788 - Min Loss:37.66 - Loss:37.66470420694178 Iter:0 - Alpha:0.05 - Batch 38/2788 - Min Loss:37.58 - Loss:37.58118032354363 Iter:0 - Alpha:0.05 - Batch 39/2788 - Min Loss:37.24 - Loss:37.24981887831019 Iter:0 - Alpha:0.05 - Batch 40/2788 - Min Loss:36.97 - Loss:36.97107029041341 Iter:0 - Alpha:0.05 - Batch 41/2788 - Min Loss:36.96 - Loss:36.96131096412938 Iter:0 - Alpha:0.05 - Batch 42/2788 - Min Loss:36.76 - Loss:36.76969259672902 Iter:0 - Alpha:0.05 - Batch 43/2788 - Min Loss:36.52 - Loss:36.52076641120323 Iter:0 - Alpha:0.05 - Batch 44/2788 - Min Loss:36.42 - Loss:36.42610153555301 Iter:0 - Alpha:0.05 - Batch 45/2788 - Min Loss:36.23 - Loss:36.23380831357556 Iter:0 - Alpha:0.05 - Batch 46/2788 - Min Loss:36.04 - Loss:36.044056917178 Iter:0 - Alpha:0.05 - Batch 47/2788 - Min Loss:35.77 - Loss:35.77064092517939 Iter:0 - Alpha:0.05 - Batch 48/2788 - Min Loss:35.56 - Loss:35.56084237253776 Iter:0 - Alpha:0.05 - Batch 49/2788 - Min Loss:35.37 - Loss:35.37191870559778 Iter:0 - Alpha:0.05 - Batch 50/2788 - Min Loss:35.29 - Loss:35.291635003489695 Iter:0 - Alpha:0.05 - Batch 51/2788 - Min Loss:35.16 - Loss:35.16507128305489 Iter:0 - Alpha:0.05 - Batch 52/2788 - Min Loss:34.93 - Loss:34.932042536211426 Iter:0 - Alpha:0.05 - Batch 53/2788 - Min Loss:34.66 - Loss:34.66460440344218 Iter:0 - Alpha:0.05 - Batch 54/2788 - Min Loss:34.43 - Loss:34.43088580889503 Iter:0 - Alpha:0.05 - Batch 55/2788 - Min Loss:34.17 - Loss:34.17781863800815 Iter:0 - Alpha:0.05 - Batch 56/2788 - Min Loss:34.00 - Loss:34.00397143855258 Iter:0 - Alpha:0.05 - Batch 57/2788 - Min Loss:33.78 - Loss:33.78768619711431 Iter:0 - Alpha:0.05 - Batch 58/2788 - Min Loss:33.54 - Loss:33.540391102459814 Iter:0 - Alpha:0.05 - Batch 59/2788 - Min Loss:33.41 - Loss:33.41414268739367 Iter:0 - Alpha:0.05 - Batch 60/2788 - Min Loss:33.28 - Loss:33.287004981875725 Iter:0 - Alpha:0.05 - Batch 61/2788 - Min Loss:33.11 - Loss:33.119700025458904 Iter:0 - Alpha:0.05 - Batch 62/2788 - Min Loss:32.93 - Loss:32.93274327439954 Iter:0 - Alpha:0.05 - Batch 63/2788 - Min Loss:32.75 - Loss:32.7517268790668 Iter:0 - Alpha:0.05 - Batch 64/2788 - Min Loss:32.52 - Loss:32.52533806696676 Iter:0 - Alpha:0.05 - Batch 65/2788 - Min Loss:32.32 - Loss:32.32143594083988 Iter:0 - Alpha:0.05 - Batch 66/2788 - Min Loss:32.25 - Loss:32.25157933217092 Iter:0 - Alpha:0.05 - Batch 67/2788 - Min Loss:32.09 - Loss:32.09264863723831 Iter:0 - Alpha:0.05 - Batch 68/2788 - Min Loss:31.97 - Loss:31.97688730252501 Iter:0 - Alpha:0.05 - Batch 69/2788 - Min Loss:31.82 - Loss:31.82220380310978 Iter:0 - Alpha:0.05 - Batch 70/2788 - Min Loss:31.69 - Loss:31.690526212828654 Iter:0 - Alpha:0.05 - Batch 71/2788 - Min Loss:31.63 - Loss:31.632962228439784 Iter:0 - Alpha:0.05 - Batch 72/2788 - Min Loss:31.48 - Loss:31.486156544089567 Iter:0 - Alpha:0.05 - Batch 73/2788 - Min Loss:31.24 - Loss:31.24954104552075 Iter:0 - Alpha:0.05 - Batch 74/2788 - Min Loss:31.08 - Loss:31.084010394152997 Iter:0 - Alpha:0.05 - Batch 75/2788 - Min Loss:30.93 - Loss:30.930785630062335 Iter:0 - Alpha:0.05 - Batch 76/2788 - Min Loss:30.77 - Loss:30.770844418083385 Iter:0 - Alpha:0.05 - Batch 77/2788 - Min Loss:30.55 - Loss:30.553141536884446 Iter:0 - Alpha:0.05 - Batch 79/2788 - Min Loss:30.44 - Loss:30.446381601158148 Iter:0 - Alpha:0.05 - Batch 80/2788 - Min Loss:30.31 - Loss:30.313430659093218 Iter:0 - Alpha:0.05 - Batch 81/2788 - Min Loss:30.17 - Loss:30.17685423526397 Iter:0 - Alpha:0.05 - Batch 82/2788 - Min Loss:30.03 - Loss:30.034068418194238 Iter:0 - Alpha:0.05 - Batch 83/2788 - Min Loss:29.86 - Loss:29.869799763537227 Iter:0 - Alpha:0.05 - Batch 84/2788 - Min Loss:29.71 - Loss:29.71315410265161 Iter:0 - Alpha:0.05 - Batch 85/2788 - Min Loss:29.62 - Loss:29.626194150081712 Iter:0 - Alpha:0.05 - Batch 86/2788 - Min Loss:29.51 - Loss:29.51259555618696 Iter:0 - Alpha:0.05 - Batch 88/2788 - Min Loss:29.42 - Loss:29.42060495535658 Iter:0 - Alpha:0.05 - Batch 89/2788 - Min Loss:29.32 - Loss:29.32396757332214 Iter:0 - Alpha:0.05 - Batch 90/2788 - Min Loss:29.12 - Loss:29.127034538647223 Iter:0 - Alpha:0.05 - Batch 91/2788 - Min Loss:28.99 - Loss:28.99104613092588 Iter:0 - Alpha:0.05 - Batch 92/2788 - Min Loss:28.93 - Loss:28.93157053340792 Iter:0 - Alpha:0.05 - Batch 93/2788 - Min Loss:28.85 - Loss:28.850017228708708 Iter:0 - Alpha:0.05 - Batch 94/2788 - Min Loss:28.72 - Loss:28.72971036113448 Iter:0 - Alpha:0.05 - Batch 95/2788 - Min Loss:28.57 - Loss:28.572422584455435 Iter:0 - Alpha:0.05 - Batch 96/2788 - Min Loss:28.44 - Loss:28.444473832731003 Iter:0 - Alpha:0.05 - Batch 97/2788 - Min Loss:28.32 - Loss:28.329838082768866 Iter:0 - Alpha:0.05 - Batch 98/2788 - Min Loss:28.22 - Loss:28.227999934535212 Iter:0 - Alpha:0.05 - Batch 99/2788 - Min Loss:28.15 - Loss:28.156615920158814 Iter:0 - Alpha:0.05 - Batch 100/2788 - Min Loss:28.08 - Loss:28.084682372851002 Iter:1 - Alpha:0.049 - Batch 2788/2788 - Min Loss:7.869 - Loss:7.869545419657005 Iter:2 - Alpha:0.049 - Batch 2788/2788 - Min Loss:7.125 - Loss:7.133998443189006 Iter:3 - Alpha:0.048 - Batch 2788/2788 - Min Loss:6.584 - Loss:6.771658681459316 Iter:4 - Alpha:0.048 - Batch 2788/2788 - Min Loss:6.482 - Loss:6.5734656052792495The forther with my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my 进程已结束,退出代码为 0 '''
更多推荐
所有评论(0)