파이썬 딥러닝 ai 스쿨 기초/lecture10

lecture10 2교시 RNN 언어 모델링

junny1997 2021. 4. 8. 10:58

다음 character 예측

 

import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn

sentence = ("if you want to build a ship, don't drum up people together to "
            "collect wood and don't assign them tasks and work, but rather "
            "teach them to long for the endless immensity of the sea.")

# 딕셔너리 생성
char_set = list(set(sentence))
char_dic = {w: i for i, w in enumerate(char_set)}
print(char_dic)
# {',': 0, 'r': 1, 'c': 2, 'n': 3, 'g': 4, '.': 5, 'o': 6, "'": 7, ' ': 8, 'm': 9, 'u': 10, 's': 11, 'h': 12,
# 'p': 13, 't': 14, 'i': 15, 'e': 16, 'd': 17, 'k': 18, 'f': 19, 'a': 20, 'l': 21, 'y': 22, 'b': 23, 'w': 24}


hidden_size = 50 # input data 차원
layer_size = 2
num_classes = len(char_set)
sequence_length = 10 # 10글자씩 backprop
learning_rate = 0.1

# input data 생성
dataX = []
dataY = []
for i in range(0, len(sentence) - sequence_length): # 10글자씩 10글자 전까지 input
    x_str = sentence[i:i + sequence_length]
    y_str = sentence[i + 1: i + sequence_length + 1]
    print(i, x_str, '->', y_str)

    # 딕셔너리에서 인덱스 불러옴
    x = [char_dic[c] for c in x_str]
    y = [char_dic[c] for c in y_str]

    dataX.append(x)
    dataY.append(y)

batch_size = len(dataX) # data 크기 만큼

# 10개씩 받아옴으로 10개씩
X = tf.placeholder(tf.int32, [None, sequence_length])
Y = tf.placeholder(tf.int32, [None, sequence_length])

# 정답 One-hot encoding 구현
X_one_hot = tf.one_hot(X, num_classes)

 

데이터 준비

 

# RNN의 개별 셀 생성, hidden_size 차원
def lstm_cell():
    cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True)
    # cell = rnn.BasicRNNCell(hidden_size)
    return cell
# RNN 셀 중첩, layer_size 층
multi_cells = rnn.MultiRNNCell([lstm_cell() for _ in range(layer_size)], state_is_tuple=True)

# dynamic_rnn: 일부 학습 반복, 멀티셀, X_one_hot 입력
outputs, _states = tf.nn.dynamic_rnn(multi_cells, X_one_hot, dtype=tf.float32)
# outputs: hidden state 개별 정보 전부 저장되어있음 , _state: 말단 정보 합쳐져있음

# reshape 로 각 글자 별로 묶음
X_for_fc = tf.reshape(outputs, [-1, hidden_size])
# n개 (num_classes) 분류 fully connected layer 자동 구성
outputs = tf.contrib.layers.fully_connected(X_for_fc, num_classes, activation_fn=None)
# softmax_w = tf.get_variable("softmax_w",[hidden_size, num_classes])
# softmax_b = tf.get_variable("softmax_b",[num_classes])
# outputs = tf.matmul(X_for_fc, softmax_w) + softmax_b

# loss 계산 위해 reshape batch_size * input 크기(squence_length) * 정답 개수
outputs = tf.reshape(outputs, [batch_size, sequence_length, num_classes])

# 가중치 모두 1
weights = tf.ones([batch_size, sequence_length])

# target 별 loss 취합, weights: 특정 character 에 가중치 줌
sequence_loss = tf.contrib.seq2seq.sequence_loss(
    logits=outputs, targets=Y, weights=weights)
# outputs: softmax score for each timestamp [[0.1 0.2 0.7], [0.6 0.2 0.2]]
mean_loss = tf.reduce_mean(sequence_loss)
# adam 으로 학습
train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(mean_loss)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

 

RNN layer 구성, train, loss 구현

 

# 학습
for i in range(500):
    _, l, results = sess.run(
        [train_op, mean_loss, outputs], feed_dict={X: dataX, Y: dataY})
    for j, result in enumerate(results):
        index = np.argmax(result, axis=1)
        print(i, j, ''.join([char_set[t] for t in index]), l)

 

실행

 

# 첫 character 로 예측 시작
print('========== next character prediction ==========')
results = sess.run(outputs, feed_dict={X: dataX})
for j, result in enumerate(results):
    index = np.argmax(result, axis=1)
    if j is 0:  # print all for the first result to make a sentence
        print(''.join([char_set[t] for t in index]), end='')
    else:
        print(char_set[index[-1]], end='')

 

예측

 

 

"if you want to build a ship, don't drum up people together to "
"collect wood and don't assign them tasks and work, but rather "
"teach them to long for the endless immensity of the sea."

 

 

m you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.