keras.R 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. #install.packages("keras")
  2. #keras::install_keras(tensorflow = 'gpu')
  3. require(keras) # Neural Networks
  4. require(tidyverse) # Data cleaning / Visualization
  5. require(knitr) # Table printing
  6. require(rmarkdown) # Misc. output utilities
  7. require(ggridges) # Visualization
  8. # load data
  9. activityLabels <- read.table("Deep_Learning_in_R_files/HAPT Data Set/activity_labels.txt",
  10. col.names = c("number", "label"))
  11. activityLabels %>% kable(align = c("c", "l"))
  12. ## 1
  13. labels <- read.table("Deep_Learning_in_R_files/HAPT Data Set/RawData/labels.txt",
  14. col.names = c("experiment", "userId", "activity", "startPos", "endPos"))
  15. labels %>%
  16. head(50) %>%
  17. paged_table()
  18. ## 2
  19. dataFiles <- list.files("Deep_Learning_in_R_files/HAPT Data Set/RawData")
  20. dataFiles %>% head()
  21. ## 3
  22. fileInfo <- data_frame(
  23. filePath = dataFiles
  24. ) %>%
  25. filter(filePath != "labels.txt") %>%
  26. separate(filePath, sep = '_',
  27. into = c("type", "experiment", "userId"),
  28. remove = FALSE) %>%
  29. mutate(
  30. experiment = str_remove(experiment, "exp"),
  31. userId = str_remove_all(userId, "user|\\.txt")
  32. ) %>%
  33. spread(type, filePath)
  34. fileInfo %>% head() %>% kable()
  35. ## 4
  36. # Read contents of single file to a dataframe with accelerometer and gyro data.
  37. readInData <- function(experiment, userId){
  38. genFilePath = function(type) {
  39. paste0("Deep_Learning_in_R_files/HAPT Data Set/RawData/", type, "_exp",
  40. experiment, "_user", userId, ".txt")
  41. }
  42. bind_cols(
  43. read.table(genFilePath("acc"), col.names = c("a_x", "a_y", "a_z")),
  44. read.table(genFilePath("gyro"), col.names = c("g_x", "g_y", "g_z"))
  45. )
  46. }
  47. # Function to read a given file and get the observations contained along
  48. # with their classes.
  49. loadFileData <- function(curExperiment, curUserId) {
  50. # load sensor data from file into dataframe
  51. allData <- readInData(curExperiment, curUserId)
  52. extractObservation <- function(startPos, endPos){
  53. allData[startPos:endPos,]
  54. }
  55. # get observation locations in this file from labels dataframe
  56. dataLabels <- labels %>%
  57. filter(userId == as.integer(curUserId),
  58. experiment == as.integer(curExperiment))
  59. # extract observations as dataframes and save as a column in dataframe.
  60. dataLabels %>%
  61. mutate(
  62. data = map2(startPos, endPos, extractObservation)
  63. ) %>%
  64. select(-startPos, -endPos)
  65. }
  66. # scan through all experiment and userId combos and gather data into a dataframe.
  67. allObservations <- map2_df(fileInfo$experiment, fileInfo$userId, loadFileData) %>%
  68. right_join(activityLabels, by = c("activity" = "number")) %>%
  69. rename(activityName = label)
  70. # cache work.
  71. write_rds(allObservations, "allObservations.rds")
  72. allObservations %>% dim()
  73. # exploring the data
  74. allObservations %>%
  75. mutate(recording_length = map_int(data,nrow)) %>%
  76. ggplot(aes(x = recording_length, y = activityName)) +
  77. geom_density_ridges(alpha = 0.8)
  78. # filtering
  79. desiredActivities <- c(
  80. "STAND_TO_SIT", "SIT_TO_STAND", "SIT_TO_LIE",
  81. "LIE_TO_SIT", "STAND_TO_LIE", "LIE_TO_STAND"
  82. )
  83. filteredObservations <- allObservations %>%
  84. filter(activityName %in% desiredActivities) %>%
  85. mutate(observationId = 1:n())
  86. filteredObservations %>% paged_table()
  87. # split the data
  88. set.seed(100) # seed for reproducibility
  89. ## get all users
  90. userIds <- allObservations$userId %>% unique()
  91. ## randomly choose 24 (80% of 30 individuals) for training
  92. trainIds <- sample(userIds, size = 24)
  93. ## set the rest of the users to the testing set
  94. testIds <- setdiff(userIds,trainIds)
  95. ## filter data.
  96. trainData <- filteredObservations %>%
  97. filter(userId %in% trainIds)
  98. testData <- filteredObservations %>%
  99. filter(userId %in% testIds)
  100. # visualize
  101. unpackedObs <- 1:nrow(trainData) %>%
  102. map_df(function(rowNum){
  103. dataRow <- trainData[rowNum, ]
  104. dataRow$data[[1]] %>%
  105. mutate(
  106. activityName = dataRow$activityName,
  107. observationId = dataRow$observationId,
  108. time = 1:n() )
  109. }) %>%
  110. gather(reading, value, -time, -activityName, -observationId) %>%
  111. separate(reading, into = c("type", "direction"), sep = "_") %>%
  112. mutate(type = ifelse(type == "a", "acceleration", "gyro"))
  113. unpackedObs %>%
  114. ggplot(aes(x = time, y = value, color = direction)) +
  115. geom_line(alpha = 0.2) +
  116. geom_smooth(se = FALSE, alpha = 0.7, size = 0.5) +
  117. facet_grid(type ~ activityName, scales = "free_y") +
  118. theme_minimal() +
  119. theme( axis.text.x = element_blank() )
  120. # preprocessing
  121. ## 1
  122. padSize <- trainData$data %>%
  123. map_int(nrow) %>%
  124. quantile(p = 0.98) %>%
  125. ceiling()
  126. padSize
  127. ## 2
  128. convertToTensor <- . %>%
  129. map(as.matrix) %>%
  130. pad_sequences(maxlen = padSize)
  131. trainObs <- trainData$data %>% convertToTensor()
  132. testObs <- testData$data %>% convertToTensor()
  133. dim(trainObs)
  134. # one hot encoding
  135. oneHotClasses <- . %>%
  136. {. - 7} %>% # bring integers down to 0-6 from 7-12
  137. to_categorical() # One-hot encode
  138. trainY <- trainData$activity %>% oneHotClasses()
  139. testY <- testData$activity %>% oneHotClasses()
  140. # define model
  141. ## define variables and const
  142. input_shape <- dim(trainObs)[-1]
  143. num_classes <- dim(trainY)[2]
  144. filters <- 24 # number of convolutional filters to learn
  145. kernel_size <- 8 # how many time-steps each conv layer sees.
  146. dense_size <- 48 # size of our penultimate dense layer.
  147. ## Initialize model
  148. model <- keras_model_sequential()
  149. model %>%
  150. layer_conv_1d(
  151. filters = filters,
  152. kernel_size = kernel_size,
  153. input_shape = input_shape,
  154. padding = "valid",
  155. activation = "relu"
  156. ) %>%
  157. layer_batch_normalization() %>%
  158. layer_spatial_dropout_1d(0.15) %>%
  159. layer_conv_1d(
  160. filters = filters/2,
  161. kernel_size = kernel_size,
  162. activation = "relu"
  163. ) %>%
  164. # Apply average pooling:
  165. layer_global_average_pooling_1d() %>%
  166. layer_batch_normalization() %>%
  167. layer_dropout(0.2) %>%
  168. layer_dense(
  169. dense_size,
  170. activation = "relu"
  171. ) %>%
  172. layer_batch_normalization() %>%
  173. layer_dropout(0.25) %>%
  174. layer_dense(
  175. num_classes,
  176. activation = "softmax",
  177. name = "dense_output"
  178. )
  179. # print graph model
  180. summary(model)
  181. # compile model
  182. model %>% compile(
  183. loss = "categorical_crossentropy",
  184. optimizer = "rmsprop",
  185. metrics = "accuracy"
  186. )
  187. # train model
  188. trainHistory <- model %>%
  189. fit(
  190. x = trainObs, y = trainY, # data
  191. epochs = 350, # num epoch
  192. validation_data = list(testObs, testY), # validation tests on each epoch
  193. callbacks = list(
  194. callback_model_checkpoint("best_model.h5",
  195. save_best_only = TRUE))) # update train history and save model
  196. # evaluation
  197. ## dataframe to get labels onto one-hot encoded prediction columns
  198. oneHotToLabel <- activityLabels %>%
  199. mutate(number = number - 7) %>%
  200. filter(number >= 0) %>%
  201. mutate(class = paste0("V",number + 1)) %>%
  202. select(-number)
  203. ## Load our best model checkpoint
  204. bestModel <- load_model_hdf5("best_model.h5")
  205. tidyPredictionProbs <- bestModel %>%
  206. predict(testObs) %>%
  207. as_data_frame() %>%
  208. mutate(obs = 1:n()) %>%
  209. gather(class, prob, -obs) %>%
  210. right_join(oneHotToLabel, by = "class")
  211. predictionPerformance <- tidyPredictionProbs %>%
  212. group_by(obs) %>%
  213. summarise(
  214. highestProb = max(prob),
  215. predicted = label[prob == highestProb]
  216. ) %>%
  217. mutate(
  218. truth = testData$activityName,
  219. correct = truth == predicted
  220. )
  221. ## print prediction
  222. predictionPerformance
  223. ## visualize error
  224. predictionPerformance %>%
  225. mutate(result = ifelse(correct, 'Correct', 'Incorrect')) %>%
  226. ggplot(aes(highestProb)) +
  227. geom_histogram(binwidth = 0.01) +
  228. geom_rug(alpha = 0.5) +
  229. facet_grid(result~.) +
  230. ggtitle("Probabilities associated with prediction by correctness")
  231. predictionPerformance %>%
  232. group_by(truth, predicted) %>%
  233. summarise(count = n()) %>%
  234. mutate(good = truth == predicted) %>%
  235. ggplot(aes(x = truth, y = predicted)) +
  236. geom_point(aes(size = count, color = good)) +
  237. geom_text(aes(label = count),
  238. hjust = 0, vjust = 0,
  239. nudge_x = 0.1, nudge_y = 0.1) +
  240. guides(color = FALSE, size = FALSE) +
  241. theme_minimal()