2016-04-23 4 views
1

Я использую код, вставленный ниже. «Прямая» часть кода, похоже, работает в силу «assert root_emb == 1 + emb [0] * emb [1]». Однако после того, как будет сделан шаг обучения (строка, следующая за утверждением), появляется странная ошибка, указывающая на проблему с TensorArray, написанную во время цикла wihle.Странная ошибка при принятии градиента TensorArray

tensorflow.python.framework.errors.InvalidArgumentError: TensorArray [email protected]: Could not read from TensorArray index 2 because it has not yet been written to. [[Node: gradients/while/TensorArrayWrite_grad/TensorArrayRead = TensorArrayRead[_class=["loc:@TensorArray"], dtype=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/while/TensorArrayWrite_grad/TensorArrayGrad/TensorArrayGrad, gradients/while/TensorArrayWrite_grad/TensorArrayRead/StackPop, gradients/while/TensorArrayWrite_grad/TensorArrayGrad/gradient_flow)]] Caused by op u'gradients/while/TensorArrayWrite_grad/TensorArrayRead', defined at: File "minimal.py", line 82, in model = TreeRNN(8, 1, 1, degree=2) File "minimal.py", line 61, in init self.grad = tf.gradients(self.loss, self.params) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/gradients.py", line 481, in gradients in_grads = _AsList(grad_fn(op, *out_grads)) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/tensor_array_grad.py", line 115, in _TensorArrayWriteGrad grad = g.read(index) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/tensor_array_ops.py", line 177, in read dtype=self._dtype, name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 781, in _tensor_array_read flow_in=flow_in, dtype=dtype, name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/op_def_library.py", line 694, in apply_op op_def=op_def) File "/Library/Python/2.7/site-packages/tensorflow/python/framework/ops.py", line 2154, in create_op original_op=self._default_original_op, op_def=op_def) File "/Library/Python/2.7/site-packages/tensorflow/python/framework/ops.py", line 1154, in init self._traceback = _extract_stack()

...which was originally created as op u'while/TensorArrayWrite', defined at: File "minimal.py", line 82, in model = TreeRNN(8, 1, 1, degree=2) File "minimal.py", line 50, in init loop_vars=(self.time, node_emb, tf.zeros([1]))) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 1681, in While back_prop=back_prop, swap_memory=swap_memory, name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 1671, in while_loop result = context.BuildLoop(cond, body, loop_vars) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 1572, in BuildLoop body_result = body(*vars_for_body_with_tensor_arrays) File "minimal.py", line 43, in _recurrence new_node_emb = node_emb.write(children_and_parent[-1], parent_emb) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/tensor_array_ops.py", line 200, in write name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 875, in _tensor_array_write value=value, flow_in=flow_in, name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/op_def_library.py", line 694, in apply_op op_def=op_def)

import numpy as np 
import tensorflow as tf 
from tensorflow.python.ops import tensor_array_ops, control_flow_ops 


class TreeRNN(object): 

    def __init__(self, num_emb, emb_dim, output_dim, degree=2, learning_rate=0.01): 
     self.num_emb = num_emb 
     self.emb_dim = emb_dim 
     self.output_dim = output_dim 
     self.degree= degree 
     self.learning_rate = tf.Variable(float(learning_rate), trainable=False) 

     self.embeddings = tf.Variable(self.init_matrix([self.num_emb, self.emb_dim])) 
     self.recursive_unit = self.create_recursive_unit() 
     self.W_out = tf.Variable(self.init_matrix([self.output_dim, self.emb_dim])) 
     self.b_out = tf.Variable(self.init_vector([self.output_dim])) 

     self.x = tf.placeholder(tf.int32, shape=[None]) # word indices 
     self.tree = tf.placeholder(tf.int32, shape=[None, self.degree + 1]) 
     self.y = tf.placeholder(tf.float32, shape=[self.output_dim]) 

     num_words, = tf.unpack(tf.shape(self.x), 1) # also num leaves 
     emb_x = tf.gather(self.embeddings, self.x) 
     node_emb = tensor_array_ops.TensorArray(
      dtype=tf.float32, size=num_words - 1, dynamic_size=True, 
      clear_after_read=False) 
     node_emb = node_emb.unpack(emb_x) 

     num_nodes, _ = tf.unpack(tf.shape(self.tree), 2) # num internal nodes 
     tree_traversal = tensor_array_ops.TensorArray(
      dtype=tf.int32, size=num_nodes) 
     tree_traversal = tree_traversal.unpack(self.tree) 

     def _recurrence(t, node_emb, _): 
      node_info = tree_traversal.read(t) 
      children_and_parent = tf.unpack(node_info, self.degree + 1) 
      child_emb = [] 
      for i in xrange(self.degree): 
       child_emb.append(node_emb.read(children_and_parent[i])) 
      parent_emb = self.recursive_unit(child_emb) 
      new_node_emb = node_emb.write(children_and_parent[-1], parent_emb) 
      return t + 1, new_node_emb, parent_emb 

     self.time = tf.constant(0, dtype=tf.int32, name='time') 
     _, _, final_emb = control_flow_ops.While(
      cond=lambda t, _1, _2: t < num_nodes, 
      body=_recurrence, 
      loop_vars=(self.time, node_emb, tf.zeros([1]))) 

     self.final_state = final_emb 

     self.pred_y = self.activation(
      tf.matmul(self.W_out, tf.reshape(self.final_state, [self.emb_dim, 1])) 
      + self.b_out) 
     self.loss = self.loss_fn(self.y, self.pred_y) 

     self.params = tf.trainable_variables() 
     opt = tf.train.GradientDescentOptimizer(self.learning_rate) 
     self.grad = tf.gradients(self.loss, self.params) 
     self.updates = opt.apply_gradients(zip(self.grad, self.params)) 

    def init_matrix(self, shape): 
     return tf.random_normal(shape, stddev=0.1) 

    def init_vector(self, shape): 
     return tf.zeros(shape) 

    def create_recursive_unit(self): 
     def unit(child_emb): # very simple 
      return 1 + child_emb[0] * child_emb[1] 
     return unit 

    def activation(self, inp): 
     return tf.sigmoid(inp) 

    def loss_fn(self, y, pred_y): 
     return tf.reduce_sum(tf.square(y - pred_y)) 


model = TreeRNN(8, 1, 1, degree=2) 
sess = tf.Session() 
sess.run(tf.initialize_all_variables()) 

root_emb = sess.run([model.final_state], 
        feed_dict={model.x: np.array([0, 1]), model.tree: np.array([[0, 1, 2]])}) 
emb, = sess.run([model.embeddings]) 
assert root_emb == 1 + emb[0] * emb[1] 

out = sess.run([model.updates, model.loss], 
       feed_dict={model.x: np.array([0, 1]), 
          model.tree: np.array([[0, 1, 2]]), 
          model.y: np.array([0])}) 
+0

Звучит как ошибка, может подать вопрос на GitHub, так что может быть имущество отбирали раненых? –

+0

Хорошо - сделаю. Знаете ли вы какие-нибудь возможные обходные пути? Кажется, что код может работать без цикла while (записывая итерации), так что это может быть одна вещь, которую я могу сделать. –

ответ

0

набор parallel_iterations = 1 в tf.while_loop

Смежные вопросы