| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298 |
- #install.packages("keras")
- #keras::install_keras(tensorflow = 'gpu')
- require(keras) # Neural Networks
- require(tidyverse) # Data cleaning / Visualization
- require(knitr) # Table printing
- require(rmarkdown) # Misc. output utilities
- require(ggridges) # Visualization
- # load data
- activityLabels <- read.table("Deep_Learning_in_R_files/HAPT Data Set/activity_labels.txt",
- col.names = c("number", "label"))
- activityLabels %>% kable(align = c("c", "l"))
- ## 1
- labels <- read.table("Deep_Learning_in_R_files/HAPT Data Set/RawData/labels.txt",
- col.names = c("experiment", "userId", "activity", "startPos", "endPos"))
- labels %>%
- head(50) %>%
- paged_table()
- ## 2
- dataFiles <- list.files("Deep_Learning_in_R_files/HAPT Data Set/RawData")
- dataFiles %>% head()
- ## 3
- fileInfo <- data_frame(
- filePath = dataFiles
- ) %>%
- filter(filePath != "labels.txt") %>%
- separate(filePath, sep = '_',
- into = c("type", "experiment", "userId"),
- remove = FALSE) %>%
- mutate(
- experiment = str_remove(experiment, "exp"),
- userId = str_remove_all(userId, "user|\\.txt")
- ) %>%
- spread(type, filePath)
- fileInfo %>% head() %>% kable()
- ## 4
- # Read contents of single file to a dataframe with accelerometer and gyro data.
- readInData <- function(experiment, userId){
- genFilePath = function(type) {
- paste0("Deep_Learning_in_R_files/HAPT Data Set/RawData/", type, "_exp",
- experiment, "_user", userId, ".txt")
- }
-
- bind_cols(
- read.table(genFilePath("acc"), col.names = c("a_x", "a_y", "a_z")),
- read.table(genFilePath("gyro"), col.names = c("g_x", "g_y", "g_z"))
- )
- }
- # Function to read a given file and get the observations contained along
- # with their classes.
- loadFileData <- function(curExperiment, curUserId) {
-
- # load sensor data from file into dataframe
- allData <- readInData(curExperiment, curUserId)
-
- extractObservation <- function(startPos, endPos){
- allData[startPos:endPos,]
- }
-
- # get observation locations in this file from labels dataframe
- dataLabels <- labels %>%
- filter(userId == as.integer(curUserId),
- experiment == as.integer(curExperiment))
-
-
- # extract observations as dataframes and save as a column in dataframe.
- dataLabels %>%
- mutate(
- data = map2(startPos, endPos, extractObservation)
- ) %>%
- select(-startPos, -endPos)
- }
- # scan through all experiment and userId combos and gather data into a dataframe.
- allObservations <- map2_df(fileInfo$experiment, fileInfo$userId, loadFileData) %>%
- right_join(activityLabels, by = c("activity" = "number")) %>%
- rename(activityName = label)
- # cache work.
- write_rds(allObservations, "allObservations.rds")
- allObservations %>% dim()
- # exploring the data
- allObservations %>%
- mutate(recording_length = map_int(data,nrow)) %>%
- ggplot(aes(x = recording_length, y = activityName)) +
- geom_density_ridges(alpha = 0.8)
- # filtering
- desiredActivities <- c(
- "STAND_TO_SIT", "SIT_TO_STAND", "SIT_TO_LIE",
- "LIE_TO_SIT", "STAND_TO_LIE", "LIE_TO_STAND"
- )
- filteredObservations <- allObservations %>%
- filter(activityName %in% desiredActivities) %>%
- mutate(observationId = 1:n())
- filteredObservations %>% paged_table()
- # split the data
- set.seed(100) # seed for reproducibility
- ## get all users
- userIds <- allObservations$userId %>% unique()
- ## randomly choose 24 (80% of 30 individuals) for training
- trainIds <- sample(userIds, size = 24)
- ## set the rest of the users to the testing set
- testIds <- setdiff(userIds,trainIds)
- ## filter data.
- trainData <- filteredObservations %>%
- filter(userId %in% trainIds)
- testData <- filteredObservations %>%
- filter(userId %in% testIds)
- # visualize
- unpackedObs <- 1:nrow(trainData) %>%
- map_df(function(rowNum){
- dataRow <- trainData[rowNum, ]
- dataRow$data[[1]] %>%
- mutate(
- activityName = dataRow$activityName,
- observationId = dataRow$observationId,
- time = 1:n() )
- }) %>%
- gather(reading, value, -time, -activityName, -observationId) %>%
- separate(reading, into = c("type", "direction"), sep = "_") %>%
- mutate(type = ifelse(type == "a", "acceleration", "gyro"))
- unpackedObs %>%
- ggplot(aes(x = time, y = value, color = direction)) +
- geom_line(alpha = 0.2) +
- geom_smooth(se = FALSE, alpha = 0.7, size = 0.5) +
- facet_grid(type ~ activityName, scales = "free_y") +
- theme_minimal() +
- theme( axis.text.x = element_blank() )
- # preprocessing
- ## 1
- padSize <- trainData$data %>%
- map_int(nrow) %>%
- quantile(p = 0.98) %>%
- ceiling()
- padSize
- ## 2
- convertToTensor <- . %>%
- map(as.matrix) %>%
- pad_sequences(maxlen = padSize)
- trainObs <- trainData$data %>% convertToTensor()
- testObs <- testData$data %>% convertToTensor()
- dim(trainObs)
- # one hot encoding
- oneHotClasses <- . %>%
- {. - 7} %>% # bring integers down to 0-6 from 7-12
- to_categorical() # One-hot encode
- trainY <- trainData$activity %>% oneHotClasses()
- testY <- testData$activity %>% oneHotClasses()
- # define model
- ## define variables and const
- input_shape <- dim(trainObs)[-1]
- num_classes <- dim(trainY)[2]
- filters <- 24 # number of convolutional filters to learn
- kernel_size <- 8 # how many time-steps each conv layer sees.
- dense_size <- 48 # size of our penultimate dense layer.
- ## Initialize model
- model <- keras_model_sequential()
- model %>%
- layer_conv_1d(
- filters = filters,
- kernel_size = kernel_size,
- input_shape = input_shape,
- padding = "valid",
- activation = "relu"
- ) %>%
- layer_batch_normalization() %>%
- layer_spatial_dropout_1d(0.15) %>%
- layer_conv_1d(
- filters = filters/2,
- kernel_size = kernel_size,
- activation = "relu"
- ) %>%
- # Apply average pooling:
- layer_global_average_pooling_1d() %>%
- layer_batch_normalization() %>%
- layer_dropout(0.2) %>%
- layer_dense(
- dense_size,
- activation = "relu"
- ) %>%
- layer_batch_normalization() %>%
- layer_dropout(0.25) %>%
- layer_dense(
- num_classes,
- activation = "softmax",
- name = "dense_output"
- )
- # print graph model
- summary(model)
- # compile model
- model %>% compile(
- loss = "categorical_crossentropy",
- optimizer = "rmsprop",
- metrics = "accuracy"
- )
- # train model
- trainHistory <- model %>%
- fit(
- x = trainObs, y = trainY, # data
- epochs = 350, # num epoch
- validation_data = list(testObs, testY), # validation tests on each epoch
- callbacks = list(
- callback_model_checkpoint("best_model.h5",
- save_best_only = TRUE))) # update train history and save model
- # evaluation
- ## dataframe to get labels onto one-hot encoded prediction columns
- oneHotToLabel <- activityLabels %>%
- mutate(number = number - 7) %>%
- filter(number >= 0) %>%
- mutate(class = paste0("V",number + 1)) %>%
- select(-number)
- ## Load our best model checkpoint
- bestModel <- load_model_hdf5("best_model.h5")
- tidyPredictionProbs <- bestModel %>%
- predict(testObs) %>%
- as_data_frame() %>%
- mutate(obs = 1:n()) %>%
- gather(class, prob, -obs) %>%
- right_join(oneHotToLabel, by = "class")
- predictionPerformance <- tidyPredictionProbs %>%
- group_by(obs) %>%
- summarise(
- highestProb = max(prob),
- predicted = label[prob == highestProb]
- ) %>%
- mutate(
- truth = testData$activityName,
- correct = truth == predicted
- )
- ## print prediction
- predictionPerformance
- ## visualize error
- predictionPerformance %>%
- mutate(result = ifelse(correct, 'Correct', 'Incorrect')) %>%
- ggplot(aes(highestProb)) +
- geom_histogram(binwidth = 0.01) +
- geom_rug(alpha = 0.5) +
- facet_grid(result~.) +
- ggtitle("Probabilities associated with prediction by correctness")
- predictionPerformance %>%
- group_by(truth, predicted) %>%
- summarise(count = n()) %>%
- mutate(good = truth == predicted) %>%
- ggplot(aes(x = truth, y = predicted)) +
- geom_point(aes(size = count, color = good)) +
- geom_text(aes(label = count),
- hjust = 0, vjust = 0,
- nudge_x = 0.1, nudge_y = 0.1) +
- guides(color = FALSE, size = FALSE) +
- theme_minimal()
|