import numpy as np
import matplotlib.pyplot as plt

%run import_npnet.py
npnet = import_npnet(2)


$$\frac{1}{2} (5-1)^2$$

$$\sigma(x) = \left(\frac{e^{x_1}}{\sum_{i=1}^n e^{x_i}}, \dots, \frac{e^{x_n}}{\sum_{i=1}^n e^{x_i}}\right).$$

$$\frac{1}{2} \left( \sigma(x) - (0, 1, 0, 0, 0, 0, 0, 0, 0, 0) \right)^2$$

$$L(p, q) = -\sum_{i=1}^n q_i \log p_i.$$

$$CE(x, e_k) = L(\sigma(x), e_k) = - \log \frac{e^{x_k}}{\sum_{i=1}^n e^{x_i}}.$$

$$s_k = \frac{e^{x_k}}{\sum_{i=1}^n e^{x_i}}.$$

$$\frac{\partial CE(x, e_k)}{\partial x_j} = -\frac{\sum_{i=1}^n e^{x_i}}{e^{x_k}} \cdot \frac{-e^{x_k}e^{x_j}}{\left(\sum_{i=1}^n e^{x_i}\right)^2} = \frac{e^{x_j}}{\sum_{i=1}^n e^{x_i}} = s_j.$$

$$\frac{\partial CE(x, e_k)}{\partial x_k} = -\frac{\sum_{i=1}^n e^{x_i}}{e^{x_k}} \cdot \frac{e^{x_k} \sum_{i=1}^n e^{x_i}-e^{x_k}e^{x_k}}{\left(\sum_{i=1}^n e^{x_i}\right)^2} = \frac{e^{x_k}}{\sum_{i=1}^n e^{x_i}} - 1 = s_k - 1.$$

# import
class CrossEntropy(npnet.Criterion):

def forward(self, output, target):
# 先计算 σ(x)
s = np.exp(output - np.max(output, axis=1, keepdims=True))
s = s / np.sum(s, axis=1, keepdims=True)
self.cached = s
self.target = target.ravel()
# 计算交叉熵 -log s_k
s = s[np.arange(self.target.size), self.target]
s = np.log(s)
return -np.sum(s) / output.shape[0]

# 计算偏导数
target, cached = self.target, self.cached
cached[np.arange(target.size), target] -= 1
cached /= cached.shape[0]
return cached


# Fashion MNIST

import os
from urllib.request import urlopen

url = 'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/'
folder = 'data'
files = [
'train-images-idx3-ubyte.gz',
'train-labels-idx1-ubyte.gz',
't10k-images-idx3-ubyte.gz',
't10k-labels-idx1-ubyte.gz'
]
if not os.path.exists(folder):
os.makedirs(folder)
for file in files:
path = os.path.join(folder, file)
if os.path.exists(path):
continue
file_url = url + file
with open(path, 'wb') as f:



# import
# kind = 'train' or 't10k'

import os
import gzip
import numpy as np

labels_npy = f'data/{kind}-labels-idx1-ubyte.npy'
images_npy = f'data/{kind}-images-idx3-ubyte.npy'

try:
except Exception:
labels_path = f'data/{kind}-labels-idx1-ubyte.gz'
images_path = f'data/{kind}-images-idx3-ubyte.gz'

with gzip.open(labels_path, 'rb') as lbpath:

with gzip.open(images_path, 'rb') as imgpath:
offset=16).reshape(len(labels), 784)
np.save(images_npy, images)
np.save(labels_npy, labels)

return images, labels

fashion_mnist_labels = [
'T-shirt/top','Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'
]


images, labels = load_fashion_mnist(kind='train')
r = c = 5
fig, ax = plt.subplots(r, c)
for i in range(r):
for j in range(c):
k = i * c + j
img = images[k]
img = img.reshape((28, 28))
lbl = labels[k]
lbl = fashion_mnist_labels[lbl]
ax[i, j].imshow(img, cmap='gray_r')
ax[i, j].set_title(lbl)
ax[i, j].axis('off')
plt.show()


test_images, test_labels = load_fashion_mnist(kind='t10k')


# import
def mnist_loss_and_accuracy(model, criterion,
images, labels, batch_size=None):
if batch_size is None:
batch_size = len(images)
batch_num = len(images) // batch_size
if len(images) % batch_size != 0:
batch_num += 1

loss = accuracy = 0
for i in range(batch_num):
input_batch = images[i * batch_size: (i + 1) * batch_size]
target_batch = labels[i * batch_size: (i + 1) * batch_size]
output = model.forward(input_batch)
loss += criterion.forward(output, target_batch)
output = np.argmax(output, axis=1)
accuracy += np.sum(output == target_batch)
loss /= batch_num
accuracy /= len(images)

return loss, accuracy

def train_fashion_mnist(model, criterion, optim,
train_images, train_labels,
batch_size=100, epoch=5,
profile=False,
test_images=None, test_labels=None):
batch_num = len(train_images) // batch_size
if len(train_images) % batch_size != 0:
batch_num += 1

# 存储训练损失以及测试损失、测试准确率
train_loss = []
train_accuracy = []
test_loss = []
test_accuracy = []

# 开始训练
for ep in range(epoch):
# 随机化训练数据的顺序
idx = np.arange(len(train_images))
np.random.shuffle(idx)
images = train_images[idx]
labels = train_labels[idx]

for i in range(batch_num):
# 一次迭代
batch = images[i * batch_size: (i + 1) * batch_size]
target = labels[i * batch_size: (i + 1) * batch_size]
output = model.forward(batch)
criterion.forward(output, target)
optim.step()

# 是否计算损失与准确率
if not profile:
continue

# 保存训练以及测试各自的损失、准确率
loss, accuracy = mnist_loss_and_accuracy(
model, criterion, train_images, train_labels, batch_size
)
train_loss.append(loss)
train_accuracy.append(accuracy)

if type(test_images) is not np.ndarray:
continue
if  type(test_labels) is not np.ndarray:
continue
loss, accuracy = mnist_loss_and_accuracy(
model, criterion, test_images, test_labels, batch_size
)
test_loss.append(loss)
test_accuracy.append(accuracy)

return train_loss, train_accuracy, test_loss, test_accuracy


# 超参数 (hyperparameters)
lr = 1e-5
batch_size = 100
epoch = 50

# 模型、标准、优化器
model = npnet.Linear(784, 10)
criterion = CrossEntropy()
optim = npnet.SGD(model, lr=lr)

train_loss, train_accuracy, test_loss, test_accuracy = train_fashion_mnist(
model=model, criterion=criterion, optim=optim,
train_images=images, train_labels=labels,
batch_size=batch_size, epoch=epoch,
test_images=test_images, test_labels=test_labels,
profile=True
)


x = np.arange(len(train_loss))
plt.plot(x, train_loss, 'b', label='train')
plt.plot(x, test_loss, 'g', label='test')
plt.legend()
plt.title('Model Loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()


plt.plot(x, train_accuracy, 'b', label='train')
plt.plot(x, test_accuracy, 'g', label='test')
plt.legend(loc='lower right')
plt.title('Model Accuracy')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.show()


batch_size = 100
epoch = 50
lrs = [1e-4, 1e-5, 1e-6]

for lr in lrs:
model = npnet.Linear(784, 10)
criterion = CrossEntropy()
optim = npnet.SGD(model, lr=lr)
loss, *_ = train_fashion_mnist(
model=model, criterion=criterion, optim=optim,
train_images=images, train_labels=labels,
profile=True, batch_size=batch_size, epoch=epoch
)
plt.plot(np.arange(len(loss)), loss, label=f'lr={lr:.0e}')

plt.title('Training loss with respect to learning rate')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend()
plt.show()