-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2020-10-4 自动编码器和自动读写.py
168 lines (137 loc) · 6.33 KB
/
2020-10-4 自动编码器和自动读写.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
'''
自动编码器(autoencoder) 是神经网络的一种,该网络可以看作由两部分组成:一个编码器函数h = f(x) 和一个生成重构的解码器
r = g(h)。传统上,自动编码器被用于降维或特征学习。
栈式自动编码器:多个隐藏层,栈式自动编码器的结构通常对称于中央隐藏层(编码层)
去噪自动编码:噪音可以是添加到输入中的纯高斯噪音,或者是随机打断输入的噪音,如dropout
'''
import os
import pandas
base_path = r'C:\Users\Administrator\Desktop\test'
# print(os.listdir(base_path))
files = [i.split(".")[0] for i in os.listdir(base_path)]
class Filename(object):
def __init__(self, file):
self.file = file
self.data = pandas.read_csv(os.path.join(base_path, self.file + '.csv'), encoding='gbk')
def run(self):
self.data.to_csv(os.path.join(base_path, self.file + 'NG.csv'), encoding='gbk')
for i in files:
filename = Filename(i)
filename.run()
# 栈式编码器
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")
# reset_graph()
import sys
from functools import partial
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 150 # codings
n_hidden3 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.01
l2_reg = 0.0001
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
he_init = tf.contrib.layers.variance_scaling_initializer() # He initialization
#Equivalent to:
#he_init = lambda shape, dtype=tf.float32: tf.truncated_normal(shape, 0., stddev=np.sqrt(2/shape[0]))
l2_regularizer = tf.contrib.layers.l2_regularizer(l2_reg)
my_dense_layer = partial(tf.layers.dense,
activation=tf.nn.elu,
kernel_initializer=he_init,
kernel_regularizer=l2_regularizer)
hidden1 = my_dense_layer(X, n_hidden1)
hidden2 = my_dense_layer(hidden1, n_hidden2)
hidden3 = my_dense_layer(hidden2, n_hidden3)
outputs = my_dense_layer(hidden3, n_outputs, activation=None)
reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))
reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
loss = tf.add_n([reconstruction_loss] + reg_losses)
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver() # not shown in the book
n_epochs = 5
batch_size = 150
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
n_batches = mnist.train.num_examples // batch_size
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="") # not shown in the book
sys.stdout.flush() # not shown
X_batch, y_batch = mnist.train.next_batch(batch_size)
sess.run(training_op, feed_dict={X: X_batch})
loss_train = reconstruction_loss.eval(feed_dict={X: X_batch}) # not shown
print("\r{}".format(epoch), "Train MSE:", loss_train) # not shown
saver.save(sess, "./my_model_all_layers.ckpt")
# 去噪自动编码 高斯噪声
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 150 # codings
n_hidden3 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.01
noise_level = 1.0
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
X_noisy = X + noise_level * tf.random_normal(tf.shape(X)) #噪声
hidden1 = tf.layers.dense(X_noisy, n_hidden1, activation=tf.nn.relu,
name="hidden1")
hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, # not shown in the book
name="hidden2") # not shown
hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, # not shown
name="hidden3") # not shown
outputs = tf.layers.dense(hidden3, n_outputs, name="outputs") # not shown
reconstruction_loss = tf.reduce_mean(tf.square(outputs - X)) # MSE
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(reconstruction_loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 10
batch_size = 150
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
n_batches = mnist.train.num_examples // batch_size
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="")
sys.stdout.flush()
X_batch, y_batch = mnist.train.next_batch(batch_size)
sess.run(training_op, feed_dict={X: X_batch})
loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})
print("\r{}".format(epoch), "Train MSE:", loss_train)
saver.save(sess, "./my_model_stacked_denoising_gaussian.ckpt")
# 使用dropout
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 150 # codings
n_hidden3 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.01
dropout_rate = 0.3
training = tf.placeholder_with_default(False, shape=(), name='training')
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
X_drop = tf.layers.dropout(X, dropout_rate, training=training) # 添加dropout层
hidden1 = tf.layers.dense(X_drop, n_hidden1, activation=tf.nn.relu,
name="hidden1")
hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, # not shown in the book
name="hidden2") # not shown
hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, # not shown
name="hidden3") # not shown
outputs = tf.layers.dense(hidden3, n_outputs, name="outputs") # not shown
reconstruction_loss = tf.reduce_mean(tf.square(outputs - X)) # MSE
n_epochs = 10
batch_size = 150
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
n_batches = mnist.train.num_examples // batch_size
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="")
sys.stdout.flush()
X_batch, y_batch = mnist.train.next_batch(batch_size)
sess.run(training_op, feed_dict={X: X_batch, training: True})
loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})
print("\r{}".format(epoch), "Train MSE:", loss_train)
saver.save(sess, "./my_model_stacked_denoising_dropout.ckpt")