knitr::include_graphics("/Users/jeffreychen/Desktop/fashionmnist.png")

安裝Keras,並使用fashion_mnist的dataset


此dataset中有十種衣著的圖片,分別以0到10表示:

類别 描述 中文
0 T-shirt/top T恤/上衣
1 Trouser 褲子
2 Pullover 套頭衫
3 Dress 連衣裙
4 Coat 外套
5 Sandal 凉鞋
6 Shirt 襯衫
7 Sneaker 運動鞋
8 Bag 背包
9 Ankle boot 短靴


library(keras)
install_keras()
## Using r-tensorflow conda environment for TensorFlow installation
## Determining latest release of TensorFlow...done
## Installing TensorFlow...
## 
## Installation complete.
fashion_mnist <- dataset_fashion_mnist()
x_train <- fashion_mnist$train$x
y_train <- fashion_mnist$train$y
x_test <- fashion_mnist$test$x
y_test <- fashion_mnist$test$y

# reshape
x_train <- array_reshape(x_train, c(nrow(x_train), 784))
x_test <- array_reshape(x_test, c(nrow(x_test), 784))
# rescale
x_train <- x_train / 255
x_test <- x_test / 255

y_train <- to_categorical(y_train, 10)
y_test <- to_categorical(y_test, 10)

1. Try layer_dropout rate as 0.4 and 0.3


model <- keras_model_sequential() 
model %>% 
  layer_dense(units = 256, activation = 'relu', input_shape = c(784)) %>% 
  layer_dropout(rate = 0.4) %>% 
  layer_dense(units = 128, activation = 'relu') %>%
  layer_dropout(rate = 0.3) %>%
  layer_dense(units = 10, activation = 'softmax')

summary(model)
## ___________________________________________________________________________
## Layer (type)                     Output Shape                  Param #     
## ===========================================================================
## dense_1 (Dense)                  (None, 256)                   200960      
## ___________________________________________________________________________
## dropout_1 (Dropout)              (None, 256)                   0           
## ___________________________________________________________________________
## dense_2 (Dense)                  (None, 128)                   32896       
## ___________________________________________________________________________
## dropout_2 (Dropout)              (None, 128)                   0           
## ___________________________________________________________________________
## dense_3 (Dense)                  (None, 10)                    1290        
## ===========================================================================
## Total params: 235,146
## Trainable params: 235,146
## Non-trainable params: 0
## ___________________________________________________________________________
model %>% compile(
  loss = 'categorical_crossentropy',
  optimizer = optimizer_rmsprop(),
  metrics = c('accuracy')
)
#Then, try validation_split as 0.2
history <- model %>% fit(
  x_train, y_train, 
  epochs = 30, batch_size = 128, 
  validation_split = 0.2
)

plot(history)

model %>% evaluate(x_test, y_test)
## $loss
## [1] 0.3803028
## 
## $acc
## [1] 0.8835
model %>% evaluate(x_train, y_train)
## $loss
## [1] 0.2441786
## 
## $acc
## [1] 0.9157
#Then, try validation_split as 0.1

history <- model %>% fit(
  x_train, y_train, 
  epochs = 30, batch_size = 128, 
  validation_split = 0.1
)

plot(history)

model %>% evaluate(x_test, y_test)
## $loss
## [1] 0.4135635
## 
## $acc
## [1] 0.8894
model %>% evaluate(x_train, y_train)
## $loss
## [1] 0.2098547
## 
## $acc
## [1] 0.92845

2. Try layer_dropout rate as 0.3 and 0.2


model <- keras_model_sequential() 
model %>% 
  layer_dense(units = 256, activation = 'relu', input_shape = c(784)) %>% 
  layer_dropout(rate = 0.3) %>% 
  layer_dense(units = 128, activation = 'relu') %>%
  layer_dropout(rate = 0.2) %>%
  layer_dense(units = 10, activation = 'softmax')

summary(model)
## ___________________________________________________________________________
## Layer (type)                     Output Shape                  Param #     
## ===========================================================================
## dense_4 (Dense)                  (None, 256)                   200960      
## ___________________________________________________________________________
## dropout_3 (Dropout)              (None, 256)                   0           
## ___________________________________________________________________________
## dense_5 (Dense)                  (None, 128)                   32896       
## ___________________________________________________________________________
## dropout_4 (Dropout)              (None, 128)                   0           
## ___________________________________________________________________________
## dense_6 (Dense)                  (None, 10)                    1290        
## ===========================================================================
## Total params: 235,146
## Trainable params: 235,146
## Non-trainable params: 0
## ___________________________________________________________________________
model %>% compile(
  loss = 'categorical_crossentropy',
  optimizer = optimizer_rmsprop(),
  metrics = c('accuracy')
)
#Then, try validation_split as 0.2
history <- model %>% fit(
  x_train, y_train, 
  epochs = 30, batch_size = 128, 
  validation_split = 0.2
)

plot(history)

model %>% evaluate(x_test, y_test)
## $loss
## [1] 0.4203545
## 
## $acc
## [1] 0.8856
model %>% evaluate(x_train, y_train)
## $loss
## [1] 0.2348596
## 
## $acc
## [1] 0.9204
#Then, try validation_split as 0.1

history <- model %>% fit(
  x_train, y_train, 
  epochs = 30, batch_size = 128, 
  validation_split = 0.1
)

plot(history)

model %>% evaluate(x_test, y_test)
## $loss
## [1] 0.4283115
## 
## $acc
## [1] 0.8884
model %>% evaluate(x_train, y_train)
## $loss
## [1] 0.1800832
## 
## $acc
## [1] 0.93595

結語


用不同的參數,對測試的資料跑出來的準確率大約是0.88

對訓練資料重新預測的準確率大約是0.92

不同參數的差別只在於損失程度不同

validation_split用0.2的話損失程度會比較小