Tensorflow: 使用 LSTM 单元初始化双向 rnn 时出错

创建于 2016-01-18  ·  3评论  ·  资料来源: tensorflow/tensorflow

当我尝试初始化bidirectional_rnn时,我想用带有LSTM单元的张量流构建一个bi-rnn模型,
它给出: ValueError: Over-sharing: Variable BiRNN_FW/RNN/BasicLSTMCell/Linear/Matrix already exists, disallowed. Did you mean to set reuse=True in VarScope?

import tensorflow as tf
from tensorflow.models.rnn import rnn, rnn_cell
from tensorflow.python.ops.constant_op import constant
import numpy as np

class Model(object):
    def __init__(self, batch_size, len_word, num_chars, dim_embed, dim_hidden):
        self.batch_size = batch_size
        self.dim_embed = dim_embed
        self.dim_hidden = dim_hidden
        self.num_chars = num_chars
        self.len_word = len_word

        with tf.device("/cpu:0"):
            self.embedding = tf.Variable(tf.random_uniform([num_chars, dim_embed], -0.1, 0.1), name='embedding')

        self.W_emb = tf.Variable(tf.random_uniform([dim_hidden*2, dim_embed], -0.1, 0.1), name='W_emb')
        self.b_emb = tf.Variable(tf.zeros([dim_embed]), name='b_emb')
        self.lstm_fw_cell = rnn_cell.BasicLSTMCell(dim_hidden)
        self.lstm_bw_cell = rnn_cell.BasicLSTMCell(dim_hidden)

    def build_model(self):
        inputs = tf.placeholder(tf.int32, [self.batch_size, self.len_word])
        input_length = tf.placeholder(tf.int64, [self.batch_size])
        lstm_state_fw = self.lstm_fw_cell.zero_state(self.batch_size, tf.float32)
        lstm_state_bw = self.lstm_bw_cell.zero_state(self.batch_size, tf.float32)

        with tf.device("/cpu:0"):
            embedded_input = tf.nn.embedding_lookup(self.embedding, tf.transpose(inputs))

        brnn_output = rnn.bidirectional_rnn(
            self.lstm_fw_cell, self.lstm_bw_cell,
            tf.unpack(embedded_input),
            sequence_length=input_length,
            initial_state_fw=lstm_state_fw,
            initial_state_bw=lstm_state_bw,
        )

        pooled_output = tf.reduce_sum( tf.pack(brnn_output), 0 )
        pooled_output = pooled_output / tf.expand_dims( tf.to_float(input_length) + 1e-6, 1)
        final_emb = tf.nn.xw_plus_b(pooled_output, self.W_emb, self.b_emb)
        final_emb = tf.nn.l2_normalize(final_emb, dim=1, epsilon=1e-7)

        return final_emb

最有用的评论

您需要为 LSTM 单元指定不同的变量范围。

with tf.variable_scope('forward'):
    self.lstm_fw_cell = rnn_cell.BasicLSTMCell(dim_hidden)   
with tf.variable_scope('backward'):
    self.lstm_bw_cell = rnn_cell.BasicLSTMCell(dim_hidden)

否则会发生名称冲突(两个单元格都尝试使用“BiRNN_FW/RNN/BasicLSTMCell/Linear/Matrix”名称)并且 tf 会将此解释为您的意图是在两个单元格中共享参数,这不是什么你要。 TF 将抛出异常,因为您没有明确告诉它在第二个作用域中重用变量: with variable_scope(name, reuse=True)

如上所述设置变量范围将为变量创建唯一的名称:
BiRNN_FW/RNN/BasicLSTMCell/前向/线性/矩阵
BiRNN_FW/RNN/BasicLSTMCell/后向/线性/矩阵

阅读共享变量指南以获取更多信息。

所有3条评论

您需要为 LSTM 单元指定不同的变量范围。

with tf.variable_scope('forward'):
    self.lstm_fw_cell = rnn_cell.BasicLSTMCell(dim_hidden)   
with tf.variable_scope('backward'):
    self.lstm_bw_cell = rnn_cell.BasicLSTMCell(dim_hidden)

否则会发生名称冲突(两个单元格都尝试使用“BiRNN_FW/RNN/BasicLSTMCell/Linear/Matrix”名称)并且 tf 会将此解释为您的意图是在两个单元格中共享参数,这不是什么你要。 TF 将抛出异常,因为您没有明确告诉它在第二个作用域中重用变量: with variable_scope(name, reuse=True)

如上所述设置变量范围将为变量创建唯一的名称:
BiRNN_FW/RNN/BasicLSTMCell/前向/线性/矩阵
BiRNN_FW/RNN/BasicLSTMCell/后向/线性/矩阵

阅读共享变量指南以获取更多信息。

请注意,这不会再发生,因为变量是在...Cell.__call__中创建的,而不是在...Cell.__init__ ,因此它不需要用于构建单元格的范围,它将处理变量范围本身在bidirectional_rnn因此您不需要自己确定范围。

我仍然有同样的问题。 有什么建议吗? 我已经尝试了所罗门提出的方法,结果相同。 结果不返回任何元组。
((encoder_fw_outputs,
编码器_bw_输出),
(encoder_fw_final_state,
编码器_bw_final_state)) = (
tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_cell,
cell_bw=encoder_cell,
输入=编码器_输入_嵌入,
序列长度=编码器输入长度,
dtype=tf.float64,time_major=True)
)

ValueError 回溯(最近一次调用最后一次)
()
20 个输入=encoder_inputs_embedded,
21 sequence_length=encoder_inputs_length,
---> 22 dtype=tf.float32, time_major=True)
23)
24

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/rnn.pyc in bidirectional_dynamic_rnn(cell_fw,cell_bw,输入,sequence_length,initial_state_fw,initial_state_bw,dtype,parallel_iterations,swap_memory , time_major, 范围)
第348话
第 349 章
--> 350 time_major=time_major, scope=fw_scope)
351
352 # 向后方向

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/rnn.pyc in dynamic_rnn(cell, input, sequence_length, initial_state, dtype, parallel_iterations, swap_memory, time_major, scope )
第544话
第545话
--> 546 dtype=dtype)
547
548 # _dynamic_rnn_loop 的输出总是形状[时间,批次,深度]。

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/rnn.pyc in _dynamic_rnn_loop(单元格,输入,initial_state,parallel_iterations,swap_memory,sequence_length,dtype)
711 loop_vars=(时间,output_ta,状态),
712 parallel_iterations=parallel_iterations,
--> 713 交换内存=交换内存)
714
715 # 如果不使用输出元组,则解压最终输出。

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.pyc in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name)
2603上下文=WhileContext(parallel_iterations,back_prop,swap_memory,名称)
2604 ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT,上下文)
-> 2605 结果 = context.BuildLoop(cond, body, loop_vars, shape_invariants)
2606 返回结果
2607

BuildLoop 中的 /home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.pyc(self、pred、body、loop_vars、shape_invariants)
第2436章
2437 original_body_result,exit_vars = self._BuildLoop(
-> 2438 pred, body, original_loop_vars, loop_vars, shape_invariants)
2439终于:
2440 self.Exit()

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.pyc in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
2386结构=original_loop_vars,
2387 flat_sequence=vars_for_body_with_tensor_arrays
-> 2388 body_result = body(*packed_vars_for_body)
2389 如果不是 nest.is_sequence(body_result):
2390 body_result = [body_result]

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/rnn.pyc in _time_step(time, output_ta_t, state)
第694话
第695话
--> 696 skip_conditionals=True)
697 其他:
698(输出,new_state)= call_cell()

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/rnn.pyc in _rnn_step(time, sequence_length, min_sequence_length, max_sequence_length, zero_output, state, call_cell, state_size, skip_conditionals )
175#步骤。 当 max_seq_len 等于展开次数时,这会更快
176 #(这是 dynamic_rnn 的典型特征)。
--> 177 new_output, new_state = call_cell()
第178话
第179话

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/rnn.pyc 中()
682
683 input_t = nest.pack_sequence_as(结构=输入,flat_sequence=input_t)
--> 684 call_cell = lambda: cell(input_t, state)
685
686 如果序列长度不是无:

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.pyc in __call__(self, input, state, scope)
第336话
第 337 章
--> 338 范围=范围)
第339话
340 值=lstm_matrix,num_or_size_splits=4,轴=1)

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.pyc in _linear(args, output_size, bias, bias_start, scope)
第 745 章
746 权重 = vs.get_variable(
--> 747 "权重", [total_arg_size, output_size], dtype=dtype)
第748话
第 749 章

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in get_variable(name, shape, dtype, initializer, regularizer, trainable, collections, caching_device, partitioner 、validate_shape、custom_getter)
第 986 章
987 分区器=分区器,validate_shape=validate_shape,
--> 988 custom_getter=custom_getter)
第989话
第 990 章

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in get_variable(self, var_store, name, shape, dtype, initializer, regularizer, trainable, collections , caching_device, partitioner, validate_shape, custom_getter)
第888话
第889话
--> 890 custom_getter=custom_getter)
891
892 def_get_partitioned_variable(自我,

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in get_variable(self, name, shape, dtype, initializer, regularizer, reuse, trainable, collections , caching_device, partitioner, validate_shape, custom_getter)
346重用=重用,可训练=可训练,集合=集合,
第347话
--> 348 validate_shape=validate_shape)
349
第350话

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in _true_getter(name, shape, dtype, initializer, regularizer, reuse, trainable, collections, caching_device ,分区器,validate_shape)
第331话
332 可训练=可训练,集合=集合,
--> 333 caching_device=caching_device,validate_shape=validate_shape)
334
335 如果 custom_getter 不是 None:

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in _get_single_variable(self、name、shape、dtype、initializer、regularizer、partition_info、reuse、trainable 、集合、缓存设备、验证形状)
第637章
638 "最初定义于:\n\n%s" % (
--> 639 名称, "".join(traceback.format_list(tb))))
第640话
第641话

ValueError: 变量 bidirectional_rnn/fw/lstm_cell/weights 已经存在,不允许。 您的意思是在 VarScope 中设置重用 = True 吗? 最初定义于:

文件“/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py”,第747行,_linear
"权重", [total_arg_size, output_size], dtype=dtype)
文件“/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py”,第338行,在__call__
范围=范围)
文件 ”",第 24 行,在
time_major=真

此页面是否有帮助?
0 / 5 - 0 等级