Tensorflow: Got error when initialize bidirectional rnn with LSTM cell

Created on 18 Jan 2016 · 3Comments · Source: tensorflow/tensorflow

I want to build a bi-rnn model with tensorflow with LSTM cell, when i try to initialize bidirectional_rnn,
it gives: ValueError: Over-sharing: Variable BiRNN_FW/RNN/BasicLSTMCell/Linear/Matrix already exists, disallowed. Did you mean to set reuse=True in VarScope?

import tensorflow as tf
from tensorflow.models.rnn import rnn, rnn_cell
from tensorflow.python.ops.constant_op import constant
import numpy as np

class Model(object):
    def __init__(self, batch_size, len_word, num_chars, dim_embed, dim_hidden):
        self.batch_size = batch_size
        self.dim_embed = dim_embed
        self.dim_hidden = dim_hidden
        self.num_chars = num_chars
        self.len_word = len_word

        with tf.device("/cpu:0"):
            self.embedding = tf.Variable(tf.random_uniform([num_chars, dim_embed], -0.1, 0.1), name='embedding')

        self.W_emb = tf.Variable(tf.random_uniform([dim_hidden*2, dim_embed], -0.1, 0.1), name='W_emb')
        self.b_emb = tf.Variable(tf.zeros([dim_embed]), name='b_emb')
        self.lstm_fw_cell = rnn_cell.BasicLSTMCell(dim_hidden)
        self.lstm_bw_cell = rnn_cell.BasicLSTMCell(dim_hidden)

    def build_model(self):
        inputs = tf.placeholder(tf.int32, [self.batch_size, self.len_word])
        input_length = tf.placeholder(tf.int64, [self.batch_size])
        lstm_state_fw = self.lstm_fw_cell.zero_state(self.batch_size, tf.float32)
        lstm_state_bw = self.lstm_bw_cell.zero_state(self.batch_size, tf.float32)

        with tf.device("/cpu:0"):
            embedded_input = tf.nn.embedding_lookup(self.embedding, tf.transpose(inputs))

        brnn_output = rnn.bidirectional_rnn(
            self.lstm_fw_cell, self.lstm_bw_cell,
            tf.unpack(embedded_input),
            sequence_length=input_length,
            initial_state_fw=lstm_state_fw,
            initial_state_bw=lstm_state_bw,
        )

        pooled_output = tf.reduce_sum( tf.pack(brnn_output), 0 )
        pooled_output = pooled_output / tf.expand_dims( tf.to_float(input_length) + 1e-6, 1)
        final_emb = tf.nn.xw_plus_b(pooled_output, self.W_emb, self.b_emb)
        final_emb = tf.nn.l2_normalize(final_emb, dim=1, epsilon=1e-7)

        return final_emb

Source

indiejoseph

Most helpful comment

You need to specify different variable scopes for the LSTM cells.

with tf.variable_scope('forward'):
    self.lstm_fw_cell = rnn_cell.BasicLSTMCell(dim_hidden)   
with tf.variable_scope('backward'):
    self.lstm_bw_cell = rnn_cell.BasicLSTMCell(dim_hidden)

Otherwise there will be a name collision (both cells try to use the "BiRNN_FW/RNN/BasicLSTMCell/Linear/Matrix" name) and tf will interpret this as if your intention is to share the parameters in the two cells, which is not what you want. TF will throw an exception because you haven't explicitly told it to reuse variables in the second scope: with variable_scope(name, reuse=True).

Setting the variable scopes, as above, will create unique names for the variables:
BiRNN_FW/RNN/BasicLSTMCell/forward/Linear/Matrix
BiRNN_FW/RNN/BasicLSTMCell/backward/Linear/Matrix

Read the Sharing Variables guide for more information.

salomons on 19 Jan 2016

👍34 😕10 ❤2

All 3 comments

You need to specify different variable scopes for the LSTM cells.

with tf.variable_scope('forward'):
    self.lstm_fw_cell = rnn_cell.BasicLSTMCell(dim_hidden)   
with tf.variable_scope('backward'):
    self.lstm_bw_cell = rnn_cell.BasicLSTMCell(dim_hidden)

Setting the variable scopes, as above, will create unique names for the variables:
BiRNN_FW/RNN/BasicLSTMCell/forward/Linear/Matrix
BiRNN_FW/RNN/BasicLSTMCell/backward/Linear/Matrix

Read the Sharing Variables guide for more information.

salomons on 19 Jan 2016

👍34 😕10 ❤2

Note that this will not happen anymore because the variables are created inside ...Cell.__call__ and not in ...Cell.__init__, thus it does not need a scope for the construction of the cells, and it will handle the variable scope itself inside bidirectional_rnn thus you don't need to scope it yourself.

albertz on 28 Dec 2016

👍3

Im still having the same issue. Any sugestions? I have tried the approch proposed by salomons, same results. Doenst return any tuple as a result.
((encoder_fw_outputs,
encoder_bw_outputs),
(encoder_fw_final_state,
encoder_bw_final_state)) = (
tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_cell,
cell_bw=encoder_cell,
inputs=encoder_inputs_embedded,
sequence_length=encoder_inputs_length,
dtype=tf.float64, time_major=True)
)

ValueError Traceback (most recent call last)
in ()
20 inputs=encoder_inputs_embedded,
21 sequence_length=encoder_inputs_length,
---> 22 dtype=tf.float32, time_major=True)
23 )
24

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/rnn.pyc in bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length, initial_state_fw, initial_state_bw, dtype, parallel_iterations, swap_memory, time_major, scope)
348 initial_state=initial_state_fw, dtype=dtype,
349 parallel_iterations=parallel_iterations, swap_memory=swap_memory,
--> 350 time_major=time_major, scope=fw_scope)
351
352 # Backward direction

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/rnn.pyc in dynamic_rnn(cell, inputs, sequence_length, initial_state, dtype, parallel_iterations, swap_memory, time_major, scope)
544 swap_memory=swap_memory,
545 sequence_length=sequence_length,
--> 546 dtype=dtype)
547
548 # Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth].

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/rnn.pyc in _dynamic_rnn_loop(cell, inputs, initial_state, parallel_iterations, swap_memory, sequence_length, dtype)
711 loop_vars=(time, output_ta, state),
712 parallel_iterations=parallel_iterations,
--> 713 swap_memory=swap_memory)
714
715 # Unpack final output if not using output tuples.

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.pyc in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name)
2603 context = WhileContext(parallel_iterations, back_prop, swap_memory, name)
2604 ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, context)
-> 2605 result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
2606 return result
2607

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.pyc in BuildLoop(self, pred, body, loop_vars, shape_invariants)
2436 self.Enter()
2437 original_body_result, exit_vars = self._BuildLoop(
-> 2438 pred, body, original_loop_vars, loop_vars, shape_invariants)
2439 finally:
2440 self.Exit()

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.pyc in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
2386 structure=original_loop_vars,
2387 flat_sequence=vars_for_body_with_tensor_arrays)
-> 2388 body_result = body(*packed_vars_for_body)
2389 if not nest.is_sequence(body_result):
2390 body_result = [body_result]

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/rnn.pyc in _time_step(time, output_ta_t, state)
694 call_cell=call_cell,
695 state_size=state_size,
--> 696 skip_conditionals=True)
697 else:
698 (output, new_state) = call_cell()

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/rnn.pyc in _rnn_step(time, sequence_length, min_sequence_length, max_sequence_length, zero_output, state, call_cell, state_size, skip_conditionals)
175 # steps. This is faster when max_seq_len is equal to the number of unrolls
176 # (which is typical for dynamic_rnn).
--> 177 new_output, new_state = call_cell()
178 nest.assert_same_structure(state, new_state)
179 new_state = nest.flatten(new_state)

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/rnn.pyc in ()
682
683 input_t = nest.pack_sequence_as(structure=inputs, flat_sequence=input_t)
--> 684 call_cell = lambda: cell(input_t, state)
685
686 if sequence_length is not None:

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.pyc in __call__(self, inputs, state, scope)
336 # i = input_gate, j = new_input, f = forget_gate, o = output_gate
337 lstm_matrix = _linear([inputs, m_prev], 4 * self._num_units, bias=True,
--> 338 scope=scope)
339 i, j, f, o = array_ops.split(
340 value=lstm_matrix, num_or_size_splits=4, axis=1)

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.pyc in _linear(args, output_size, bias, bias_start, scope)
745 with vs.variable_scope(scope) as outer_scope:
746 weights = vs.get_variable(
--> 747 "weights", [total_arg_size, output_size], dtype=dtype)
748 if len(args) == 1:
749 res = math_ops.matmul(args[0], weights)

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in get_variable(name, shape, dtype, initializer, regularizer, trainable, collections, caching_device, partitioner, validate_shape, custom_getter)
986 collections=collections, caching_device=caching_device,
987 partitioner=partitioner, validate_shape=validate_shape,
--> 988 custom_getter=custom_getter)
989 get_variable_or_local_docstring = (
990 """%s

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in get_variable(self, var_store, name, shape, dtype, initializer, regularizer, trainable, collections, caching_device, partitioner, validate_shape, custom_getter)
888 collections=collections, caching_device=caching_device,
889 partitioner=partitioner, validate_shape=validate_shape,
--> 890 custom_getter=custom_getter)
891
892 def _get_partitioned_variable(self,

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in get_variable(self, name, shape, dtype, initializer, regularizer, reuse, trainable, collections, caching_device, partitioner, validate_shape, custom_getter)
346 reuse=reuse, trainable=trainable, collections=collections,
347 caching_device=caching_device, partitioner=partitioner,
--> 348 validate_shape=validate_shape)
349
350 def _get_partitioned_variable(

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in _true_getter(name, shape, dtype, initializer, regularizer, reuse, trainable, collections, caching_device, partitioner, validate_shape)
331 initializer=initializer, regularizer=regularizer, reuse=reuse,
332 trainable=trainable, collections=collections,
--> 333 caching_device=caching_device, validate_shape=validate_shape)
334
335 if custom_getter is not None:

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in _get_single_variable(self, name, shape, dtype, initializer, regularizer, partition_info, reuse, trainable, collections, caching_device, validate_shape)
637 " Did you mean to set reuse=True in VarScope? "
638 "Originally defined at:\n\n%s" % (
--> 639 name, "".join(traceback.format_list(tb))))
640 found_var = self._vars[name]
641 if not shape.is_compatible_with(found_var.get_shape()):

ValueError: Variable bidirectional_rnn/fw/lstm_cell/weights already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:

File "/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py", line 747, in _linear
"weights", [total_arg_size, output_size], dtype=dtype)
File "/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py", line 338, in __call__
scope=scope)
File "", line 24, in
time_major=True