Tensorflow: LSTM ์…€๋กœ ์–‘๋ฐฉํ–ฅ rnn์„ ์ดˆ๊ธฐํ™”ํ•  ๋•Œ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.

์— ๋งŒ๋“  2016๋…„ 01์›” 18์ผ  ยท  3์ฝ”๋ฉ˜ํŠธ  ยท  ์ถœ์ฒ˜: tensorflow/tensorflow

bidirectional_rnn์„ ์ดˆ๊ธฐํ™”ํ•˜๋ ค๊ณ  ํ•  ๋•Œ LSTM ์…€์ด ์žˆ๋Š” tensorflow๋กœ bi-rnn ๋ชจ๋ธ์„ ๋งŒ๋“ค๊ณ  ์‹ถ์Šต๋‹ˆ๋‹ค.
๊ทธ๊ฒƒ์€ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค: ValueError: Over-sharing: Variable BiRNN_FW/RNN/BasicLSTMCell/Linear/Matrix already exists, disallowed. Did you mean to set reuse=True in VarScope?

import tensorflow as tf
from tensorflow.models.rnn import rnn, rnn_cell
from tensorflow.python.ops.constant_op import constant
import numpy as np

class Model(object):
    def __init__(self, batch_size, len_word, num_chars, dim_embed, dim_hidden):
        self.batch_size = batch_size
        self.dim_embed = dim_embed
        self.dim_hidden = dim_hidden
        self.num_chars = num_chars
        self.len_word = len_word

        with tf.device("/cpu:0"):
            self.embedding = tf.Variable(tf.random_uniform([num_chars, dim_embed], -0.1, 0.1), name='embedding')

        self.W_emb = tf.Variable(tf.random_uniform([dim_hidden*2, dim_embed], -0.1, 0.1), name='W_emb')
        self.b_emb = tf.Variable(tf.zeros([dim_embed]), name='b_emb')
        self.lstm_fw_cell = rnn_cell.BasicLSTMCell(dim_hidden)
        self.lstm_bw_cell = rnn_cell.BasicLSTMCell(dim_hidden)

    def build_model(self):
        inputs = tf.placeholder(tf.int32, [self.batch_size, self.len_word])
        input_length = tf.placeholder(tf.int64, [self.batch_size])
        lstm_state_fw = self.lstm_fw_cell.zero_state(self.batch_size, tf.float32)
        lstm_state_bw = self.lstm_bw_cell.zero_state(self.batch_size, tf.float32)

        with tf.device("/cpu:0"):
            embedded_input = tf.nn.embedding_lookup(self.embedding, tf.transpose(inputs))

        brnn_output = rnn.bidirectional_rnn(
            self.lstm_fw_cell, self.lstm_bw_cell,
            tf.unpack(embedded_input),
            sequence_length=input_length,
            initial_state_fw=lstm_state_fw,
            initial_state_bw=lstm_state_bw,
        )

        pooled_output = tf.reduce_sum( tf.pack(brnn_output), 0 )
        pooled_output = pooled_output / tf.expand_dims( tf.to_float(input_length) + 1e-6, 1)
        final_emb = tf.nn.xw_plus_b(pooled_output, self.W_emb, self.b_emb)
        final_emb = tf.nn.l2_normalize(final_emb, dim=1, epsilon=1e-7)

        return final_emb

๊ฐ€์žฅ ์œ ์šฉํ•œ ๋Œ“๊ธ€

LSTM ์…€์— ๋Œ€ํ•ด ๋‹ค๋ฅธ ๋ณ€์ˆ˜ ๋ฒ”์œ„๋ฅผ ์ง€์ •ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.

with tf.variable_scope('forward'):
    self.lstm_fw_cell = rnn_cell.BasicLSTMCell(dim_hidden)   
with tf.variable_scope('backward'):
    self.lstm_bw_cell = rnn_cell.BasicLSTMCell(dim_hidden)

๊ทธ๋ ‡์ง€ ์•Š์œผ๋ฉด ์ด๋ฆ„ ์ถฉ๋Œ(๋‘ ์…€ ๋ชจ๋‘ "BiRNN_FW/RNN/BasicLSTMCell/Linear/Matrix" ์ด๋ฆ„์„ ์‚ฌ์šฉํ•˜๋ ค๊ณ  ์‹œ๋„ํ•จ)์ด ๋ฐœ์ƒํ•˜๊ณ  tf๋Š” ์ด๋ฅผ ๋‘ ์…€์˜ ๋งค๊ฐœ๋ณ€์ˆ˜๋ฅผ ๊ณต์œ ํ•˜๋ ค๋Š” ์˜๋„์ธ ๊ฒƒ์ฒ˜๋Ÿผ ํ•ด์„ํ•ฉ๋‹ˆ๋‹ค. ๋‹น์‹ ์ด ์›ํ•˜๋Š”. ๋‘ ๋ฒˆ์งธ ๋ฒ”์œ„ with variable_scope(name, reuse=True) ์—์„œ ๋ณ€์ˆ˜๋ฅผ ์žฌ์‚ฌ์šฉํ•˜๋„๋ก ๋ช…์‹œ์ ์œผ๋กœ ์ง€์‹œํ•˜์ง€ ์•Š์•˜๊ธฐ ๋•Œ๋ฌธ์— TF๋Š” ์˜ˆ์™ธ๋ฅผ throwํ•ฉ๋‹ˆ๋‹ค.

์œ„์™€ ๊ฐ™์ด ๋ณ€์ˆ˜ ๋ฒ”์œ„๋ฅผ ์„ค์ •ํ•˜๋ฉด ๋ณ€์ˆ˜์— ๋Œ€ํ•œ ๊ณ ์œ ํ•œ ์ด๋ฆ„์ด ์ƒ์„ฑ๋ฉ๋‹ˆ๋‹ค.
BiRNN_FW/RNN/BasicLSTMCell/ ์ •๋ฐฉํ–ฅ /์„ ํ˜•/ํ–‰๋ ฌ
BiRNN_FW/RNN/BasicLSTMCell/ ์—ญ๋ฐฉํ–ฅ /์„ ํ˜•/ํ–‰๋ ฌ

์ž์„ธํ•œ ๋‚ด์šฉ์€ ๊ณต์œ  ๋ณ€์ˆ˜ ๊ฐ€์ด๋“œ๋ฅผ ์ฝ์–ด๋ณด์„ธ์š”.

๋ชจ๋“  3 ๋Œ“๊ธ€

LSTM ์…€์— ๋Œ€ํ•ด ๋‹ค๋ฅธ ๋ณ€์ˆ˜ ๋ฒ”์œ„๋ฅผ ์ง€์ •ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.

with tf.variable_scope('forward'):
    self.lstm_fw_cell = rnn_cell.BasicLSTMCell(dim_hidden)   
with tf.variable_scope('backward'):
    self.lstm_bw_cell = rnn_cell.BasicLSTMCell(dim_hidden)

๊ทธ๋ ‡์ง€ ์•Š์œผ๋ฉด ์ด๋ฆ„ ์ถฉ๋Œ(๋‘ ์…€ ๋ชจ๋‘ "BiRNN_FW/RNN/BasicLSTMCell/Linear/Matrix" ์ด๋ฆ„์„ ์‚ฌ์šฉํ•˜๋ ค๊ณ  ์‹œ๋„ํ•จ)์ด ๋ฐœ์ƒํ•˜๊ณ  tf๋Š” ์ด๋ฅผ ๋‘ ์…€์˜ ๋งค๊ฐœ๋ณ€์ˆ˜๋ฅผ ๊ณต์œ ํ•˜๋ ค๋Š” ์˜๋„์ธ ๊ฒƒ์ฒ˜๋Ÿผ ํ•ด์„ํ•ฉ๋‹ˆ๋‹ค. ๋‹น์‹ ์ด ์›ํ•˜๋Š”. ๋‘ ๋ฒˆ์งธ ๋ฒ”์œ„ with variable_scope(name, reuse=True) ์—์„œ ๋ณ€์ˆ˜๋ฅผ ์žฌ์‚ฌ์šฉํ•˜๋„๋ก ๋ช…์‹œ์ ์œผ๋กœ ์ง€์‹œํ•˜์ง€ ์•Š์•˜๊ธฐ ๋•Œ๋ฌธ์— TF๋Š” ์˜ˆ์™ธ๋ฅผ throwํ•ฉ๋‹ˆ๋‹ค.

์œ„์™€ ๊ฐ™์ด ๋ณ€์ˆ˜ ๋ฒ”์œ„๋ฅผ ์„ค์ •ํ•˜๋ฉด ๋ณ€์ˆ˜์— ๋Œ€ํ•œ ๊ณ ์œ ํ•œ ์ด๋ฆ„์ด ์ƒ์„ฑ๋ฉ๋‹ˆ๋‹ค.
BiRNN_FW/RNN/BasicLSTMCell/ ์ •๋ฐฉํ–ฅ /์„ ํ˜•/ํ–‰๋ ฌ
BiRNN_FW/RNN/BasicLSTMCell/ ์—ญ๋ฐฉํ–ฅ /์„ ํ˜•/ํ–‰๋ ฌ

์ž์„ธํ•œ ๋‚ด์šฉ์€ ๊ณต์œ  ๋ณ€์ˆ˜ ๊ฐ€์ด๋“œ๋ฅผ ์ฝ์–ด๋ณด์„ธ์š”.

๋ณ€์ˆ˜๊ฐ€ ...Cell.__call__ ๋‚ด๋ถ€๊ฐ€ ์•„๋‹ˆ๋ผ ...Cell.__init__ ๋‚ด๋ถ€์— ์ƒ์„ฑ๋˜๊ธฐ ๋•Œ๋ฌธ์— ์ด๊ฒƒ์€ ๋” ์ด์ƒ ๋ฐœ์ƒํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค. ๋”ฐ๋ผ์„œ ์…€ ๊ตฌ์„ฑ์„ ์œ„ํ•œ ๋ฒ”์œ„๊ฐ€ ํ•„์š”ํ•˜์ง€ ์•Š์œผ๋ฉฐ ๋ณ€์ˆ˜ ๋ฒ”์œ„๋ฅผ ์ฒ˜๋ฆฌํ•ฉ๋‹ˆ๋‹ค. ์ž์ฒด bidirectional_rnn ์žˆ์œผ๋ฏ€๋กœ ์ง์ ‘ ๋ฒ”์œ„๋ฅผ ์ง€์ •ํ•  ํ•„์š”๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.

์—ฌ์ „ํžˆ ๊ฐ™์€ ๋ฌธ์ œ๊ฐ€ ์žˆ์Šต๋‹ˆ๋‹ค. ์–ด๋–ค ์ œ์•ˆ? ๋‚˜๋Š” salomons๊ฐ€ ์ œ์•ˆํ•œ ์ ‘๊ทผ ๋ฐฉ์‹์„ ์‹œ๋„ํ–ˆ์ง€๋งŒ ๋™์ผํ•œ ๊ฒฐ๊ณผ๋ฅผ ์–ป์—ˆ์Šต๋‹ˆ๋‹ค. ๊ฒฐ๊ณผ์ ์œผ๋กœ ํŠœํ”Œ์„ ๋ฐ˜ํ™˜ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.
((encoder_fw_outputs,
์ธ์ฝ”๋”_bw_์ถœ๋ ฅ),
(encoder_fw_final_state,
์ธ์ฝ”๋”_bw_final_state)) = (
tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_cell,
cell_bw=encoder_cell,
์ž…๋ ฅ=encoder_inputs_embedded,
sequence_length=encoder_inputs_length,
dtype=tf.float64, time_major=True)
)

ValueError Traceback(๊ฐ€์žฅ ์ตœ๊ทผ ํ˜ธ์ถœ ๋งˆ์ง€๋ง‰)
~์—()
20๊ฐœ์˜ ์ž…๋ ฅ=encoder_inputs_embedded,
21 ์‹œํ€€์Šค_๊ธธ์ด=์ธ์ฝ”๋”_์ž…๋ ฅ_๊ธธ์ด,
---> 22 dtype=tf.float32, time_major=True)
23)
24

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/rnn.pyc in bidirectional_dynamic_rnn(cell_fw, cell_bw, ์ž…๋ ฅ, sequence_length, initial_state_fw, initial_state_bw, dtype, parallel_memorys , time_major, ๋ฒ”์œ„)
348
349ํ™”
--> 350 time_major=time_major, ๋ฒ”์œ„=fw_scope)
351
352 # ์—ญ๋ฐฉํ–ฅ

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/rnn.pyc in dynamic_rnn(cell, input, sequence_length, initial_state, dtype, parallel_iterations, swap_memory, time_major, scope )
544
545
--> 546 dtype=dtype)
547
548 # _dynamic_rnn_loop์˜ ์ถœ๋ ฅ์€ ํ•ญ์ƒ [์‹œ๊ฐ„, ๋ฐฐ์น˜, ๊นŠ์ด] ๋ชจ์–‘์ž…๋‹ˆ๋‹ค.

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/rnn.pyc in _dynamic_rnn_loop(์…€, ์ž…๋ ฅ, ์ดˆ๊ธฐ ์ƒํƒœ, ๋ณ‘๋ ฌ ๋ฐ˜๋ณต, ์Šค์™‘ ๋ฉ”๋ชจ๋ฆฌ, ์‹œํ€€์Šค ๊ธธ์ด, dtype)
711
712
--> 713 swap_memory=swap_memory)
714
715 # ์ถœ๋ ฅ ํŠœํ”Œ์„ ์‚ฌ์šฉํ•˜์ง€ ์•Š๋Š” ๊ฒฝ์šฐ ์ตœ์ข… ์ถœ๋ ฅ์˜ ์••์ถ•์„ ํ’‰๋‹ˆ๋‹ค.

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.pyc in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, ์ด๋ฆ„)
2603 ์ปจํ…์ŠคํŠธ = whileContext(๋ณ‘๋ ฌ_๋ฐ˜๋ณต, back_prop, swap_memory, ์ด๋ฆ„)
2604ํ™”
-> 2605 ๊ฒฐ๊ณผ = context.BuildLoop(cond, body, loop_vars, shape_invariants)
2606 ๋ฐ˜ํ™˜ ๊ฒฐ๊ณผ
2607

BuildLoop์˜ /home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.pyc(self, pred, body, loop_vars, shape_invariants)
2436 ์ž๊ธฐ.Enter()
2437
-> 2438 pred, body, original_loop_vars, loop_vars, shape_invariants)
2439 ๋“œ๋””์–ด:
2440 ์ž๊ธฐ.์ข…๋ฃŒ()

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.pyc in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
2386 ๊ตฌ์กฐ=์›๋ž˜_๋ฃจํ”„_vars,
2387
-> 2388 body_result = body(*packed_vars_for_body)
2389 if not nest.is_sequence(body_result):
2390 ๅฏถๅบฆ =

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/rnn.pyc _time_step(์‹œ๊ฐ„, output_ta_t, ์ƒํƒœ)
694ํ™”
695ํ™”
--> 696 skip_conditionals=์ฐธ)
697 ๊ธฐํƒ€:
698 (์ถœ๋ ฅ, new_state) = call_cell()

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/rnn.pyc in _rnn_step(time, sequence_length, min_sequence_length, max_sequence_length, zero_output, state, call_cell, state_size, skip_conditionals )
175 # ๋‹จ๊ณ„. max_seq_len์ด ์–ธ๋กค ์ˆ˜์™€ ๊ฐ™์„ ๋•Œ ๋” ๋น ๋ฆ…๋‹ˆ๋‹ค.
176 # (dynamic_rnn์— ์ผ๋ฐ˜์ ์ž„).
--> 177 new_output, new_state = call_cell()
178ํ™”
179ํ™”

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/rnn.pyc ์ž…๋ ฅ()
682
683 ๅฏถๅบฆๅ ‚ = ๅฏถๅบฆๅ ‚
--> 684 call_cell = ๋žŒ๋‹ค: ์…€(์ž…๋ ฅ_t, ์ƒํƒœ)
685
686 sequence_length๊ฐ€ None์ด ์•„๋‹Œ ๊ฒฝ์šฐ:

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.pyc __call__(์ž์ฒด, ์ž…๋ ฅ, ์ƒํƒœ, ๋ฒ”์œ„)
336 # i = input_gate, j = new_input, f = forget_gate, o = output_gate
337
--> 338 ๋ฒ”์œ„=๋ฒ”์œ„)
339ํ™”
340

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.pyc in _linear(args, output_size, bias, bias_start, scope)
745 vs.variable_scope(scope)๋ฅผ outer_scope๋กœ ์‚ฌ์šฉ:
746 ๊ฐ€์ค‘ = vs.get_variable(
--> 747 "๊ฐ€์ค‘์น˜", [์ด ์ธ์ˆ˜_ํฌ๊ธฐ, ์ถœ๋ ฅ_ํฌ๊ธฐ], dtype=dtype)
748 ๋งŒ์•ฝ len(args) == 1:
749 = ็”ฒๅ ‚

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in get_variable(name, shape, dtype, initializer, regularizer, trainingable, ์ปฌ๋ ‰์…˜, caching_device, ํŒŒํ‹ฐ์…”๋„ˆ , validate_shape, custom_getter)
986 ์ปฌ๋ ‰์…˜=์ปฌ๋ ‰์…˜, caching_device=caching_device,
987*
--> 988 custom_getter=custom_getter)
989ํ™”
990 """%s

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in get_variable(self, var_store, name, shape, dtype, initializer, regularizer, trainingable, ์ปฌ๋ ‰์…˜ , caching_device, ํŒŒํ‹ฐ์…”๋„ˆ, validate_shape, custom_getter)
888 ์ปฌ๋ ‰์…˜=์ปฌ๋ ‰์…˜, caching_device=caching_device,
889 ํŒŒํ‹ฐ์…”๋„ˆ=ํŒŒํ‹ฐ์…”๋„ˆ, validate_shape=validate_shape,
--> 890 custom_getter=custom_getter)
891
892ํ™”

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in get_variable(self, name, shape, dtype, initializer, regularizer, ์žฌ์‚ฌ์šฉ, ํ›ˆ๋ จ ๊ฐ€๋Šฅ, ์ปฌ๋ ‰์…˜ , caching_device, ํŒŒํ‹ฐ์…”๋„ˆ, validate_shape, custom_getter)
346ํ™”
347 caching_device=caching_device, ํŒŒํ‹ฐ์…”๋„ˆ=ํŒŒํ‹ฐ์…”๋„ˆ,
--> 348 validate_shape=validate_shape)
349
350ํ™”

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in _true_getter(name, shape, dtype, initializer, regularizer, ์žฌ์‚ฌ์šฉ, ํ›ˆ๋ จ ๊ฐ€๋Šฅ, ์ปฌ๋ ‰์…˜, caching_device , ํŒŒํ‹ฐ์…”๋„ˆ, validate_shape)
331 ์ด๋‹ˆ์…œ๋ผ์ด์ €=์ด๋‹ˆ์…œ๋ผ์ด์ €, ๋ ˆ๊ทค๋Ÿฌ๋ผ์ด์ €=๋ ˆ๊ทค๋Ÿฌ๋ผ์ด์ €, ์žฌ์‚ฌ์šฉ=์žฌ์‚ฌ์šฉ,
332ํ™”
--> 333 caching_device=caching_device, validate_shape=validate_shape)
334
335 custom_getter๊ฐ€ None์ด ์•„๋‹Œ ๊ฒฝ์šฐ:

/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in _get_single_variable(self, name, shape, dtype, initializer, regularizer, partition_info, ์žฌ์‚ฌ์šฉ, ํ›ˆ๋ จ ๊ฐ€๋Šฅ , ์ปฌ๋ ‰์…˜, caching_device, validate_shape)
637 " VarScope ์—์„œ ์žฌ์‚ฌ์šฉ=True ๋ฅผ ์„ค์ •ํ•˜๋ ค๋˜ ๊ฒ๋‹ˆ๊นŒ? "
638 "์›๋ž˜ ์ •์˜๋œ ์œ„์น˜:\n\n%s" %(
--> 639 ์ด๋ฆ„, "".join(traceback.format_list(tb))))
640ํ™”
641 if not shape.is_compatible_with(found_var.get_shape()):

ValueError: ๋ณ€์ˆ˜ bidirectional_rnn/fw/lstm_cell/weights๊ฐ€ ์ด๋ฏธ ์กด์žฌํ•˜๋ฉฐ ํ—ˆ์šฉ๋˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค. VarScope์—์„œ ์žฌ์‚ฌ์šฉ=True๋ฅผ ์„ค์ •ํ•˜๋ ค๊ณ  ํ–ˆ์Šต๋‹ˆ๊นŒ? ์›๋ž˜ ์ •์˜:

ํŒŒ์ผ "/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py", 747ํ–‰, _linear
"๊ฐ€์ค‘์น˜", [total_arg_size, output_size], dtype=dtype)
ํŒŒ์ผ "/home/cesar/anaconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py", 338ํ–‰, __call__
๋ฒ”์œ„=๋ฒ”์œ„)
ํŒŒ์ผ "", 24ํ–‰,
time_major=์ฐธ

์ด ํŽ˜์ด์ง€๊ฐ€ ๋„์›€์ด ๋˜์—ˆ๋‚˜์š”?
0 / 5 - 0 ๋“ฑ๊ธ‰