metya
/
DeepLearning_in_R
réplica de https://github.com/metya/DeepLearning_in_R.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
							fc_1 <- mx.symbol.FullyConnected(data = flatten, num_hidden = 500)
tanh_3 <- mx.symbol.Activation(data = fc_1, act_type = "tanh")
fc_2 <- mx.symbol.FullyConnected(data = tanh_3, num_hidden = 40)
# define whole model
graph_model <- mx.symbol.SoftmaxOutput(data = fc_2)
# plot our model
#graph.viz(symbol = symbol)
logger <- mx.metric.logger()
epoch.end.callback <- mx.callback.log.train.metric(
period = 1, # число батчей, после которого оценивается метрика
logger = logger)
# learning model
model<- mx.model.FeedForward.create(graph_model, # our graph model
X = train_array, # data
y = train_y, # labels
ctx = mx.cpu(), # engine
num.round = 10, # num epoch
array.batch.size = 64, # batch size
learning.rate = 0.01,
momentum = 0.9,
eval.metric = mx.metric.accuracy, # our metrics
epoch.end.callback = mx.callback.log.train.metric(1, logger)) # write learn stat
graph.viz(model$symbol)
# predict
predicted <- predict(model, test_array)
# predict
predicted <- predict(model, test)
# predict
predicted <- predict(model, test_array)
olivetti <- olivetti_faces()
show_olivetti_face(olivetti, 2, 10)
sample <- sample.int(n = nrow(olivetti), size = floor(.75*nrow(olivetti)), replace = F)
train <- olivetti[sample, ]
test <- olivetti[-sample, ]
#transform and split train on x and y
train <- data.matrix(train)
train_x <- t(train[, -1])
train_y <- train[, 1]
train_array <- train_x
dim(train_array) <- c(64, 64, 1, ncol(train_x))
#transform and split test on x and y
test <- data.matrix(test)
test_x <- t(test[, -1])
test_y <- test[, 1]
test_array <- test_x
dim(test_array) <- c(64, 64, 1, ncol(test_x))
# define model
data <- mx.symbol.Variable('data')
# define conv layers
conv_1 <- mx.symbol.Convolution(data = data, kernel = c(5, 5), num_filter = 20) # convolution
tanh_1 <- mx.symbol.Activation(data = conv_1, act_type = "tanh") # activation function
pool_1 <- mx.symbol.Pooling(data = tanh_1, pool_type = "max", kernel = c(2, 2), stride = c(2, 2)) # pooling
conv_2 <- mx.symbol.Convolution(data = pool_1, kernel = c(5, 5), num_filter = 50)
tanh_2 <- mx.symbol.Activation(data = conv_2, act_type = "tanh")
pool_2 <- mx.symbol.Pooling(data=tanh_2, pool_type = "max", kernel = c(2, 2), stride = c(2, 2))
# define fully connected layers
flatten <- mx.symbol.Flatten(data = pool_2)
fc_1 <- mx.symbol.FullyConnected(data = flatten, num_hidden = 500)
tanh_3 <- mx.symbol.Activation(data = fc_1, act_type = "tanh")
fc_2 <- mx.symbol.FullyConnected(data = tanh_3, num_hidden = 40)
# define whole model
graph_model <- mx.symbol.SoftmaxOutput(data = fc_2)
# plot our model
#graph.viz(symbol = symbol)
logger <- mx.metric.logger()
epoch.end.callback <- mx.callback.log.train.metric(
period = 1, # число батчей, после которого оценивается метрика
logger = logger)
# learning model
model<- mx.model.FeedForward.create(graph_model, # our graph model
X = train_array, # data
y = train_y, # labels
ctx = mx.cpu(), # engine
num.round = 10, # num epoch
array.batch.size = 64, # batch size
learning.rate = 0.01,
momentum = 0.9,
eval.metric = mx.metric.accuracy, # our metrics
epoch.end.callback = mx.callback.log.train.metric(1, logger)) # write learn stat
# predict
predicted <- predict(model, test_array)
predicted
predicted_labels <- max.col(t(predicted)) - 1
# calculate accuracy
sum(diag(table(test[, 1], predicted_labels)))/40
# learning model
model<- mx.model.FeedForward.create(graph_model, # our graph model
X = train_array, # data
y = train_y, # labels
ctx = mx.cpu(), # engine
num.round = 10, # num epoch
array.batch.size = 64, # batch size
learning.rate = 0.1,
momentum = 0.9,
eval.metric = mx.metric.accuracy, # our metrics
epoch.end.callback = mx.callback.log.train.metric(1, logger)) # write learn stat
predicted_labels <- max.col(t(predicted)) - 1
# calculate accuracy
sum(diag(table(test[, 1], predicted_lab
els)))/40
# calculate accuracy
sum(diag(table(test[, 1], predicted_labels)))/40
data <- mx.symbol.Variable('data')
# define conv layers
conv_1 <- mx.symbol.Convolution(data = data, kernel = c(5, 5), num_filter = 20) # convolution
relu_1 <- mx.symbol.Activation(data = conv_1, act_type = "tanh") # activation function
pool_1 <- mx.symbol.Pooling(data = tanh_1, pool_type = "max", kernel = c(2, 2), stride = c(2, 2)) # pooling
conv_2 <- mx.symbol.Convolution(data = pool_1, kernel = c(5, 5), num_filter = 50)
relu_2 <- mx.symbol.Activation(data = conv_2, act_type = "relu")
pool_2 <- mx.symbol.Pooling(data=tanh_2, pool_type = "max", kernel = c(2, 2), stride = c(2, 2))
# define fully connected layers
flatten <- mx.symbol.Flatten(data = pool_2)
fc_1 <- mx.symbol.FullyConnected(data = flatten, num_hidden = 500)
relu_3 <- mx.symbol.Activation(data = fc_1, act_type = "relu")
fc_2 <- mx.symbol.FullyConnected(data = relu_3, num_hidden = 40)
# define whole model
graph_model <- mx.symbol.SoftmaxOutput(data = fc_2)
# plot our model
#graph.viz(symbol = symbol)
logger <- mx.metric.logger()
epoch.end.callback <- mx.callback.log.train.metric(
period = 1, # число батчей, после которого оценивается метрика
logger = logger)
# learning model
model<- mx.model.FeedForward.create(graph_model, # our graph model
X = train_array, # data
y = train_y, # labels
ctx = mx.cpu(), # engine
num.round = 10, # num epoch
array.batch.size = 64, # batch size
learning.rate = 0.1,
momentum = 0.9,
eval.metric = mx.metric.accuracy, # our metrics
epoch.end.callback = mx.callback.log.train.metric(1, logger)) # write learn stat
# predict
predicted <- predict(model, test_array)
predicted_labels <- max.col(t(predicted)) - 1
# calculate accuracy
sum(diag(table(test[, 1], predicted_labels)))/40
set.seed(100)
# load data and split
olivetti <- olivetti_faces()
show_olivetti_face(olivetti, 2, 10)
sample <- sample.int(n = nrow(olivetti), size = floor(.75*nrow(olivetti)), replace = F)
train <- olivetti[sample, ]
test <- olivetti[-sample, ]
#transform and split train on x and y
train <- data.matrix(train)
train_x <- t(train[, -1])
train_y <- train[, 1]
train_array <- train_x
dim(train_array) <- c(64, 64, 1, ncol(train_x))
#transform and split test on x and y
test <- data.matrix(test)
test_x <- t(test[, -1])
test_y <- test[, 1]
test_array <- test_x
dim(test_array) <- c(64, 64, 1, ncol(test_x))
# define model
data <- mx.symbol.Variable('data')
# define conv layers
conv_1 <- mx.symbol.Convolution(data = data, kernel = c(3, 3), num_filter = 20) # convolution
relu_1 <- mx.symbol.Activation(data = conv_1, act_type = "tanh") # activation function
pool_1 <- mx.symbol.Pooling(data = relu_1, pool_type = "max", kernel = c(2, 2), stride = c(2, 2)) # pooling
conv_2 <- mx.symbol.Convolution(data = pool_1, kernel = c(3, 3), num_filter = 50)
relu_2 <- mx.symbol.Activation(data = conv_2, act_type = "relu")
pool_2 <- mx.symbol.Pooling(data=relu_2, pool_type = "max", kernel = c(2, 2), stride = c(2, 2))
# define fully connected layers
flatten <- mx.symbol.Flatten(data = pool_2)
fc_1 <- mx.symbol.FullyConnected(data = flatten, num_hidden = 500)
relu_3 <- mx.symbol.Activation(data = fc_1, act_type = "relu")
fc_2 <- mx.symbol.FullyConnected(data = relu_3, num_hidden = 40)
# define whole model
graph_model <- mx.symbol.SoftmaxOutput(data = fc_2)
# plot our model
#graph.viz(symbol = symbol)
logger <- mx.metric.logger()
epoch.end.callback <- mx.callback.log.train.metric(
period = 1, # число батчей, после которого оценивается метрика
logger = logger)
# learning model
model<- mx.model.FeedForward.create(graph_model, # our graph model
X = train_array, # data
y = train_y, # labels
ctx = mx.cpu(), # engine
num.round = 10, # num epoch
array.batch.size = 64, # batch size
learning.rate = 0.1,
momentum = 0.9,
eval.metric = mx.metric.accuracy, # our metrics
epoch.end.callback = mx.callback.log.train.metric(1, logger)) # write learn stat
# predict
predicted <- predict(model, test_array)
predicted_labels <- max.col(t(predicted)) - 1
# calculate accuracy
sum(diag(table(test[, 1], predicted_labels)))/40
predicted <- predict(model, t(test_array))
predicted_labels <- max.col(t(predicted)) - 1
# calculate accuracy
sum(diag(table(test[, 1], predicted_labels)))/40
predicted <- predict(model, test_array)
predicted_labels <- max.col(t(predicted)) - 1
# calculate accuracy
sum(diag(table(test[, 1], predicted_labels)))/40
predicted
dump("df", stdout())
# plot our model
graph.viz(linreg)
# load data and split
df <-
structure(list(name = c("acebutolol", "acebutolol_ester", "acetylsalic_acid",
"acyclovir", "alprenolol", "alprenolol ester", "aminopyrin",
"artemisinin", "artesunate", "atenolol", "betazolol ester", "betazolol_",
"bremazocine", "caffeine", "chloramphenicol", "chlorothiazide",
"chlorpromazine", "cimetidine", "clonidine", "corticosterone",
"desiprarnine", "dexamethas", "dexamethas_beta_D_glucoside",
"dexamethas_beta_D_glucuronide", "diazepam", "dopamine", "doxorubici",
"erythromycin", "estradiol", "felodipine", "ganciclovir", "griseofulvin",
"hydrochlorothiazide", "hydrocortisone", "ibuprophen", "imipramine",
"indomethacin", "labetalol", "mannitol", "meloxicam", "methanol",
"methotrexate", "methylscopolamine", "metoprolol", "nadolol",
"naproxen", "nevirapine", "nicotine", "olsalazine", "oxprenolol",
"oxprenolol ester", "phencyclidine", "Phenytoin", "pindolol",
"pirenzepine", "piroxicam", "pnu200603", "practolol", "prazocin",
"progesterone", "propranolol", "propranolo_ester", "quinidine",
"ranitidine", "salicylic acid", "scopolamine", "sucrose", "sulfasalazine",
"telmisartan", "terbutaline", "tesosterone", "timolol", "timolol_ester",
"uracil", "urea", "warfarine", "zidovudine"), log_P_eff_exp = c(-5.83,
-4.61, -5.06, -6.15, -4.62, -4.47, -4.44, -4.52, -5.4, -6.44,
-4.81, -4.52, -5.1, -4.41, -4.69, -6.72, -4.7, -5.89, -4.59,
-4.47, -4.67, -4.75, -6.54, -6.12, -4.32, -5.03, -6.8, -5.43,
-4.77, -4.64, -6.27, -4.44, -6.06, -4.66, -4.28, -4.85, -4.69,
-5.03, -6.21, -4.71, -4.58, -5.92, -6.16, -4.59, -5.41, -4.83,
-4.52, -4.71, -6.96, -4.68, -4.51, -4.61, -4.57, -4.78, -6.36,
-4.45, -6.25, -6.05, -4.36, -4.37, -4.58, -4.48, -4.69, -6.31,
-4.79, -4.93, -5.77, -6.33, -4.82, -6.38, -4.34, -4.85, -4.6,
-5.37, -5.34, -4.68, -5.16), log_D = c(-0.09, 1.59, -2.25, -1.8,
1.38, 2.78, 0.63, 2.22, -0.88, -1.81, 0.28, 0.63, 1.66, 0.02,
1.14, -1.15, 1.86, -0.36, 0.78, 1.78, 1.57, 1.89, 0.58, -1.59,
2.58, -0.8, -0.16, 1.26, 2.24, 3.48, -0.87, 2.47, -0.12, 1.48,
0.68, 2.52, 1, 1.24, -2.65, 0.03, -0.7, -2.53, -1.14, 0.51, 0.68,
0.42, 1.81, 0.41, -4.5, 0.45, 1.98, 1.31, 2.26, 0.19, -0.46,
-0.07, -4, -1.4, 1.88, 3.48, 1.55, 3.02, 2.04, -0.12, -1.44,
0.21, -3.34, -0.42, 2.41, -1.07, 3.11, 0.03, 1.74, -1.11, -1.64,
0.64, -0.58), rgyr = c(4.64, 5.12, 3.41, 3.37, 3.68, 3.84, 2.97,
2.75, 4.02, 4.58, 5.41, 5.64, 3.43, 2.47, 3.75, 3.11, 3.74, 4.26,
2.79, 3.68, 3.4, 3.6, 5.67, 5.75, 3.28, 2.67, 4.85, 4.99, 3.44,
3.39, 3.7, 3.37, 3.11, 3.72, 3.45, 3.44, 4.16, 4.61, 2.48, 3.34,
0.84, 5.33, 3.67, 4.59, 4.37, 3.38, 2.94, 2.5, 4.62, 3.63, 3.87,
2.91, 2.97, 3.71, 3.55, 3.17, 3.89, 4.02, 4.96, 3.58, 3.63, 4.13,
3.25, 5.13, 2.14, 3.63, 3.49, 5.68, 5.29, 3.15, 3.33, 4.02, 3.98,
1.84, 1.23, 3.45, 3.14), rgyr_d = c(4.51, 5.03, 3.24, 3.23, 3.69,
3.88, 2.97, 2.75, 3.62, 4.52, 5.27, 5.39, 3.38, 2.47, 3.73, 3.11,
3.69, 4.24, 2.79, 3.71, 3.42, 3.66, 5.28, 5.23, 3.28, 2.68, 4.9,
5.01, 3.44, 3.48, 3.48, 3.37, 3.11, 3.79, 3.36, 3.45, 3.16, 4.46,
2.59, 3.36, 0.84, 5.18, 3.74, 4.53, 4.1, 3.43, 2.94, 2.5, 4.37,
3.56, 3.9, 2.91, 2.97, 3.71, 3.4, 3.26, 3.79, 4.09, 4.99, 3.62,
3.53, 4.06, 3.3, 4.57, 2.14, 3.49, 3.54, 5.53, 5.01, 3.15, 3.33,
4.01, 4.13, 1.84, 1.23, 3.5, 3.13), HCPSA = c(82.88, 77.08, 79.38,
120.63, 38.92, 35.53, 20.81, 54.27, 102.05, 86.82, 43.02, 47.14,
49.56, 45.55, 113.73, 138.76, 4.6, 105.44, 30.03, 75.95, 13.8,
90.74, 163.95, 186.88, 25.93, 75.13, 186.78, 138.69, 44.34, 50.34,
139.45, 67.55, 142.85, 93.37, 39.86, 3.56, 67.13, 93.29, 127.46,
93.21, 25.64, 204.96, 51.29, 44.88, 86.73, 76.98, 36.68, 15.1,
144.08, 48.62, 49.58, 1.49, 65.63, 52.8, 59.71, 99.19, 69.89,
64.79, 86.76, 38.1, 40.42, 36.21, 43.77, 105.15, 61.71, 57.35,
187.69, 133.67, 55.48, 79.52, 42.35, 100.74, 96.25, 66.72, 82.72,
59.47, 96.33), TPSA = c(87.66, 93.73, 89.9, 114.76, 41.49, 47.56,
26.79, 53.99, 100.52, 84.58, 50.72, 56.79, 43.7, 58.44, 115.38,
118.69, 6.48, 88.89, 36.42, 74.6, 15.27, 94.83, 173.98, 191.05,
32.67, 66.48, 206.07, 193.91, 40.46, 64.63, 134.99, 71.06, 118.36,
94.83, 37.3, 6.48, 68.53, 95.58, 121.38, 99.6, 20.23, 210.54,
59.06, 50.72, 81.95, 46.53, 58.12, 16.13, 139.78, 50.72, 56.79,
3.24, 58.2, 57.28, 68.78, 99.6, 91.44, 70.59, 106.95, 34.14,
41.49, 47.56, 45.59, 86.26, 57.53, 62.3, 189.53, 141.31, 72.94,
72.72, 37.3, 79.74, 85.81, 58.2, 69.11, 63.6, 103.59), N_rotb = c(0.31,
0.29, 0.23, 0.21, 0.29, 0.27, 0.17, 0.07, 0.16, 0.29, 0.27, 0.26,
0.15, 0.12, 0.28, 0.08, 0.14, 0.33, 0.08, 0.1, 0.11, 0.13, 0.17,
0.17, 0.06, 0.23, 0.18, 0.21, 0.06, 0.22, 0.25, 0.16, 0.08, 0.12,
0.24, 0.13, 0.19, 0.24, 0.44, 0.16, 0.2, 0.26, 0.16, 0.3, 0.24,
0.19, 0.05, 0.07, 0.27, 0.31, 0.29, 0.04, 0.06, 0.23, 0.08, 0.13,
0.15, 0.29, 0.15, 0.07, 0.22, 0.22, 0.14, 0.33, 0.19, 0.15, 0.28,
0.2, 0.15, 0.29, 0.06, 0.24, 0.23, 0, 0.29, 0.15, 0.18), log_P_eff_calc = c(-5.3,
-4.89, -5.77, -5.91, -4.58, -4.39, -4.63, -4.47, -5.64, -5.85,
-5.2, -5.13, -4.57, -4.89, -5.11, -5.87, -4.38, -5.55, -4.69,
-4.78, -4.46, -4.77, -5.83, -6.55, -4.45, -5.27, -6, -5.13, -4.57,
-4.44, -5.79, -4.59, -5.62, -4.94, -4.78, -4.28, -5, -5.09, -5.87,
-5.27, -4.67, -6.79, -5.37, -4.99, -5.15, -5.09, -4.49, -4.65,
-6.97, -4.84, -4.45, -4.42, -4.6, -5.02, -5.3, -5.31, -6.37,
-5.5, -5.05, -4.54, -4.57, -4.5, -4.46, -5.6, -5.29, -5.07, -6.56,
-6.06, -4.85, -5.36, -4.53, -5.35, -4.82, -5.23, -5.29, -4.95,
-5.43), residuals = c(-0.53, 0.28, 0.71, -0.24, -0.04, -0.08,
0.19, -0.05, 0.24, -0.59, 0.39, 0.61, -0.53, 0.48, 0.42, -0.85,
-0.32, -0.34, 0.1, 0.31, -0.21, 0.02, -0.71, 0.43, 0.13, 0.24,
-0.8, -0.3, -0.2, -0.2, -0.48, 0.15, -0.44, 0.28, 0.5, -0.57,
0.31, 0.06, -0.34, 0.56, 0.09, 0.87, -0.79, 0.4, -0.26, 0.26,
-0.03, -0.06, 0.01, 0.16, -0.06, -0.19, 0.03, 0.24, -1.06, 0.86,
0.12, -0.55, 0.69, 0.17, -0.01, 0.02, -0.23, -0.71, 0.5, 0.14,
0.79, -0.27, 0.03, -1.02, 0.19, 0.5, 0.22, -0.14, -0.05, 0.27,
0.27)), row.names = c(NA, -77L), class = c("tbl_df", "tbl", "data.frame"
))
set.seed(42)
#transform and split train on x and y
train_ind <- sample(1:77, 60)
x_train <- as.matrix(df[train_ind, 2:8])
y_train <- unlist(df[train_ind, 9])
x_val <- as.matrix(df[-train_ind, 2:8])
y_val <- unlist(df[-train_ind, 9])
# define model
data <- mx.symbol.Variable("data")
fc1 <- mx.symbol.FullyConnected(data,
num_hidden = 1)
linreg <- mx.symbol.LinearRegressionOutput(fc1)
initializer <- mx.init.normal(sd = 0.1)
optimizer <- mx.opt.create("sgd",
learning.rate = 1e-6,
momentum = 0.9)
logger <- mx.metric.logger()
epoch.end.callback <- mx.callback.log.train.metric(
period = 4, # число батчей, после которого оценивается метрика
logger = logger)
n_epoch <- 20
# plot our model
graph.viz(linreg)
# learning model
model <- mx.model.FeedForward.create(
symbol = linreg,
X = x_train,
y = y_train,
ctx = mx.cpu(),
num.round = n_epoch,
initializer = initializer,
optimizer = optimizer,
eval.data = list(data = x_val, label = y_val),
eval.metric = mx.metric.rmse,
array.batch.size = 15,
epoch.end.callback = epoch.end.callback)
logger$train
logger$eval
rmse_log <- data.frame(RMSE = c(logger$train, logger$eval),
dataset = c(rep("train",
length(logger$train)),
rep("val",
length(logger$eval))),
epoch = 1:n_epoch)
library(ggplot2)
ggplot(rmse_log, aes(epoch, RMSE,
group = dataset,
colour = dataset)) +
geom_point() +
geom_line()
xaringan:::inf_mr()
install_keras()
install.packages("keras")
keras::install_keras(tensorflow = 'gpu')
require(keras)
model <- keras_model_sequential()
model <- keras_model()
keras::install_keras(tensorflow = 'gpu')
detach(keras)
keras::install_keras(tensorflow = 'gpu')
model <- keras_model_sequential()
require(keras)
model <- keras_model_sequential()
require(keras)
model <- keras_model_sequential()
require(dplyr)
#install.packages("keras")
#keras::install_keras(tensorflow = 'gpu')
require(keras)
require(dplyr)
mnist <- dataset_mnist()
x_train <- mnist$train$x
y_train <- mnist$train$y
x_test <- mnist$test$x
y_test <- mnist$test$y
x_train <- array_reshape(x_train, c(nrow(x_train), 784))
x_test <- array_reshape(x_test, c(nrow(x_test), 784))
# rescale
x_train <- x_train / 255
x_test <- x_test / 255
y_train <- to_categorical(y_train, 10)
y_test <- to_categorical(y_test, 10)
model <- keras_model_sequential()
model %>%
layer_dense(units = 256, activation = 'relu', input_shape = c(784)) %>%
layer_dropout(rate = 0.4) %>%
layer_dense(units = 128, activation = 'relu') %>%
layer_dropout(rate = 0.3) %>%
layer_dense(units = 10, activation = 'softmax')
summary(model)
model %>% compile(
loss = 'categorical_crossentropy',
optimizer = optimizer_rmsprop(),
metrics = c('accuracy')
)
history <- model %>% fit(
x_train, y_train,
epochs = 30, batch_size = 128,
validation_split = 0.2
)
plot(history)
model %>% evaluate(x_test, y_test)
model %>% predict_classes(x_test)
require(keras)     # Neural Networks
require(tidyverse) # Data cleaning / Visualization
require(knitr)     # Table printing
require(rmarkdown) # Misc. output utilities
require(ggridges)  # Visualization
install.packages("ggridges")
require(keras)     # Neural Networks
require(tidyverse) # Data cleaning / Visualization
require(knitr)     # Table printing
require(rmarkdown) # Misc. output utilities
require(ggridges)  # Visualization
install.packages("tidyverse")
require(keras)     # Neural Networks
require(tidyverse) # Data cleaning / Visualization
require(knitr)     # Table printing
require(rmarkdown) # Misc. output utilities
require(ggridges)  # Visualization
activityLabels <- read.table("Deep_Learning_in_R_files/HAPT Data Set/activity_labels.txt",
col.names = c("number", "label"))
activityLabels %>% kable(align = c("c", "l"))
activityLabels <- read.table("Deep_Learning_in_R_files/HAPT Data Set/activity_labels.txt",
col.names = c("number", "label"))
activityLabels %>% kable(align = c("c", "l"))
labels <- read.table("data/RawData/labels.txt",
col.names = c("experiment", "userId", "activity", "startPos", "endPos"))
labels %>%
head(50) %>%
paged_table()
labels <- read.table("Deep_Learning_in_R_files/HAPT Data Set/RawData/labels.txt",
col.names = c("experiment", "userId", "activity", "startPos", "endPos"))
labels %>%
head(50) %>%
paged_table()
dataFiles <- list.files("data/RawData")
dataFiles %>% head()
dataFiles <- list.files("Deep_Learning_in_R_files/HAPT Data Set/RawData")
dataFiles %>% head()
## 3
fileInfo <- data_frame(
filePath = dataFiles
) %>%
filter(filePath != "labels.txt") %>%
separate(filePath, sep = '_',
into = c("type", "experiment", "userId"),
remove = FALSE) %>%
mutate(
experiment = str_remove(experiment, "exp"),
userId = str_remove_all(userId, "user|\\.txt")
) %>%
spread(type, filePath)
fileInfo %>% head() %>% kable()
## 4
# Read contents of single file to a dataframe with accelerometer and gyro data.
readInData <- function(experiment, userId){
genFilePath = function(type) {
paste0("Deep_Learning_in_R_files/HAPT Data Set/RawData/", type, "_exp",
experiment, "_user", userId, ".txt")
}
bind_cols(
read.table(genFilePath("acc"), col.names = c("a_x", "a_y", "a_z")),
read.table(genFilePath("gyro"), col.names = c("g_x", "g_y", "g_z"))
)
}
# Function to read a given file and get the observations contained along
# with their classes.
loadFileData <- function(curExperiment, curUserId) {
# load sensor data from file into dataframe
allData <- readInData(curExperiment, curUserId)
extractObservation <- function(startPos, endPos){
allData[startPos:endPos,]
}
# get observation locations in this file from labels dataframe
dataLabels <- labels %>%
filter(userId == as.integer(curUserId),
experiment == as.integer(curExperiment))
# extract observations as dataframes and save as a column in dataframe.
dataLabels %>%
mutate(
data = map2(startPos, endPos, extractObservation)
) %>%
select(-startPos, -endPos)
}
# scan through all experiment and userId combos and gather data into a dataframe.
allObservations <- map2_df(fileInfo$experiment, fileInfo$userId, loadFileData) %>%
right_join(activityLabels, by = c("activity" = "number")) %>%
rename(activityName = label)
# cache work.
write_rds(allObservations, "allObservations.rds")
allObservations %>% dim()
# exploring the data
allObservations %>%
mutate(recording_length = map_int(data,nrow)) %>%
ggplot(aes(x = recording_length, y = activityName)) +
geom_density_ridges(alpha = 0.8)
# filtering
desiredActivities <- c(
"STAND_TO_SIT", "SIT_TO_STAND", "SIT_TO_LIE",
"LIE_TO_SIT", "STAND_TO_LIE", "LIE_TO_STAND"
)
filteredObservations <- allObservations %>%
filter(activityName %in% desiredActivities) %>%
mutate(observationId = 1:n())
filteredObservations %>% paged_table()
## get all users
userIds <- allObservations$userId %>% unique()
# split the data
set.seed(42) # seed for reproducibility
filteredObservations %>% paged_table()