import tensorflow as tf
import random
import time
import os
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True, validation_size=5000)
#5000개 지정
#input output 이름(name) X Y 지정, 불러오기 편함
X = tf.placeholder(tf.float32, [None, 784], name="X")
Y = tf.placeholder(tf.float32, [None, 10], name="Y")
keep_prob = tf.placeholder(tf.float32, name="keep_prob")
#노드를 임의로 키고 끄는 drop out 구현
#기존 W1 = tf.Variable(tf.random_normal([784, 300]))
W1 = tf.get_variable("W1", shape=[784, 300], initializer=tf.contrib.layers.xavier_initializer())
#xavier로 초기화, 초기화 방법 명시 시 tf.get_variable 사용
b1 = tf.Variable(tf.random_normal([300]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)
L1 = tf.nn.dropout(L1, keep_prob=keep_prob)
#drop out 자동 구현
W2 = tf.get_variable("W2", shape=[300, 200], initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([200]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)
L2 = tf.nn.dropout(L2, keep_prob=keep_prob)
W3 = tf.get_variable("W3", shape=[200, 10], initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([10]))
hypothesis = tf.nn.xw_plus_b(L2, W3, b3, name="hypothesis")
#tf.matmul(L2, W3) + b3 와 같음 이름 정해 쉽게 접근
correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#텐서 보드용 추후
summary_op = tf.summary.scalar("accuracy", accuracy)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
'''
Weight decay, Weight restriction
l2_loss = 0.0
W1 = tf.Variable(tf.random_normal([784, 256]))
b1 = tf.Variable(tf.random_normal([256]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)
l2_loss +=tf.nn.l2_loss(W1)
l2_loss +=tf.nn.l2_loss(b1)
W2 = tf.Variable(tf.random_normal([256, 256]))
b2 = tf.Variable(tf.random_normal([256]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)
l2_loss +=tf.nn.l2_loss(W2)
l2_loss +=tf.nn.l2_loss(b2)
W3 = tf.Variable(tf.random_normal([256, 10]))
b3 = tf.Variable(tf.random_normal([10]))
hypothesis = tf.matmul(L2, W3) + b3
l2_loss +=tf.nn.l2_loss(W3)
l2_loss +=tf.nn.l2_loss(b3)
l2_loss_lambda = 0.001
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y)) + l2_loss_lambda * l2_loss
'''
learning_rate = 0.001
# optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
# optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate).minimize(cost)
# optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate, momentum=0.9).minimize(cost)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # back propagation
# initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())
training_epochs = 30
batch_size = 100
# ========================================================================
timestamp = str(time.strftime('%m-%d-%H-%M-%S', time.localtime(time.time()))) # runs/1578546654/checkpoints/
out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
train_summary_dir = os.path.join(out_dir, "summaries", "train")
train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)
val_summary_dir = os.path.join(out_dir, "summaries", "dev")
val_summary_writer = tf.summary.FileWriter(val_summary_dir, sess.graph)
checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
checkpoint_prefix = os.path.join(checkpoint_dir, "model")
# ========================================================================
#체크포인트 저장 파일 생성 (모델3개 까지)
if not os.path.exists(checkpoint_dir):
os.makedirs(checkpoint_dir)
saver = tf.train.Saver(tf.global_variables(), max_to_keep=3)
max = 0 # max validation accuracy
dropout = 0.8 # prob to live
early_stopped = 0 # early_stopped time
for epoch in range(training_epochs):
avg_cost = 0
total_batch = int(mnist.train.num_examples / batch_size) # iteration 55000/100 = 550
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size) # (100, 784), (100, 10)
feed_dict = {X: batch_xs, Y: batch_ys, keep_prob: dropout}
#drop out을 위해 keep_prob에 drop out 넣어줘야함
c, _, a = sess.run([cost, optimizer, summary_op], feed_dict=feed_dict)
avg_cost += c / total_batch
#......
print('Epoch:', '%04d' % (epoch + 1), 'training cost =', '{:.9f}'.format(avg_cost))
#텐서보드, validation을 통채로 불러서 돌림
# ========================================================================
train_summary_writer.add_summary(a, epoch)
val_accuracy, summaries = sess.run([accuracy, summary_op],
feed_dict={X: mnist.validation.images,
Y: mnist.validation.labels,
keep_prob: 1.0})
val_summary_writer.add_summary(summaries, epoch)
# ========================================================================
#val로 얼리스탑
print('Validation Accuracy:', val_accuracy)
if val_accuracy > max:
max = val_accuracy
early_stopped = epoch + 1
saver.save(sess, checkpoint_prefix, global_step=early_stopped)
#......
print('Learning Finished!')
print('Validation Max Accuracy:', max)
print('Early stopped time:', early_stopped)
test_accuracy = sess.run(accuracy, feed_dict={X: mnist.test.images, Y: mnist.test.labels, keep_prob: 1.0})
print('Latest Model Test Accuracy:', test_accuracy)
Epoch: 0030 training cost = 0.019202987
Validation Accuracy: 0.9846
Learning Finished!
Validation Max Accuracy: 0.9852
Early stopped time: 20
Latest Model Test Accuracy: 0.9824
early stopping
training_epochs = 100
batch_size = 100
timestamp = str(int(time.time()))
out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
checkpoint_prefix = os.path.join(checkpoint_dir, "model")
if not os.path.exists(checkpoint_dir):
os.makedirs(checkpoint_dir)
saver = tf.train.Saver(tf.global_variables(), max_to_keep=3)
print('Epoch:', '%04d' % (epoch + 1), 'training cost =', '{:.9f}'.format(avg_cost))
val_accuracy= sess.run(accuracy, feed_dict={X: mnist.validation.images, Y: mnist.validation.labels})
print('Validation Accuracy:', val_accuracy)
if val_accuracy > max:
max = val_accuracy
early_stopped = epoch + 1
saver.save(sess, checkpoint_prefix, global_step=early_stopped)
print('Learning Finished!')
print('Validation Max Accuracy:', max)
print('Early stopped time:', early_stopped)
'파이썬 딥러닝 ai 스쿨 기초 > lecture03' 카테고리의 다른 글
lecture03 1교시 개념정리 (0) | 2021.03.22 |
---|---|
lecture03 0교시 파이썬 기초 연습문제 (0) | 2021.03.22 |