-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from Randul-Malinhara/add-initial-files
Add the scripts, datasets
- Loading branch information
Showing
9 changed files
with
286 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
# Import libraries | ||
library(readxl) | ||
library(neuralnet) | ||
library(caTools) | ||
library(MLmetrics) | ||
library(Metrics) | ||
library(xlsx) | ||
|
||
# Build the Assumed Values and real Values table | ||
predictedVsreal <- function (assumed, mlp_test) { | ||
assumedTest <- cbind(assumed, as.data.frame(mlp_test$net.result)) | ||
colnames(assumedTest) <- c("Expected Output", "NeuralNetwork Output") | ||
return(assumedTest) | ||
} | ||
|
||
# Evaluation Function | ||
rmse <- function (real, predicted) | ||
{ | ||
RMSE <- sqrt(mean((real - predicted)^2)) | ||
return(RMSE) | ||
} | ||
evaluationFunction <- function(real,predict) { | ||
rmse_mlp <- rmse(real = real, predicted = predict) | ||
mae_mlp <- Metrics::mae(real = real, predicted = predict) | ||
mape_mlp <- MAPE(y_pred = predict, y_true = real) | ||
return(c(rmse_mlp, mae_mlp, mape_mlp)) | ||
} | ||
|
||
# Import Processed Data set | ||
powerOutage <- read_xlsx("C:/Users/RANDUL/Desktop/2nd Semester of 2nd Year/5DATA001C.2 Machine Learning and Data Mining/CW/scaled_powerusage_10.xlsx") | ||
|
||
# # For 9th Hour and 10th Hour | ||
# (Note: Run the both file individually to get the input as 9th hour and 10th hour) | ||
# powerOutage <- read_xlsx("C:/Users/RANDUL/Desktop/2nd Semester of 2nd Year/5DATA001C.2 Machine Learning and Data Mining/CW/powerUsage-scaled-09.xlsx") | ||
# powerOutage <- read_xlsx("C:/Users/RANDUL/Desktop/2nd Semester of 2nd Year/5DATA001C.2 Machine Learning and Data Mining/CW/powerUsage-scaled-10.xlsx") | ||
|
||
# Split Data | ||
set.seed(123) | ||
splitRule <- sample(seq_len(nrow(powerOutage)), size = 430) | ||
train <- powerOutage[splitRule, ] | ||
test <- powerOutage[-splitRule, ] | ||
|
||
# Define X values of test data | ||
x_test <- test[-1] | ||
|
||
# Define Y values of test data | ||
y_test <- test[1] | ||
|
||
# MLP NN - Configuration 1 | ||
relation1 <- as.formula("wineD_original_data~v2+v3+v4+v5+v6") | ||
mlp1 <- neuralnet(formula = relation1, data = train,hidden = c(4,3), stepmax = 1e+10) | ||
plot(mlp1) | ||
|
||
# Make prediction on X test data | ||
y_pred1 <- neuralnet::compute(mlp1, x_test) | ||
evaluationData1 <- predictedVsreal(y_test, y_pred1) | ||
mlpTestResult1 <- evaluationFunction(real = evaluationData1$`Expected Output`, predict = evaluationData1$`NeuralNetwork Output`) | ||
|
||
# MLP NN - Configuration 2 | ||
relation2 <- as.formula("wineD_original_data~v2+v3+v4+v5+v6") | ||
mlp2 <- neuralnet(formula = relation2, data = train,hidden = c(4,3), stepmax = 1e+10, learningrate = 0.001) | ||
plot(mlp2) | ||
|
||
# Make prediction on X test data | ||
y_pred2 <- neuralnet::compute(mlp2, x_test) | ||
evaluationData2 <- predictedVsreal(y_test, y_pred2) | ||
mlpTestResult2 <- evaluationFunction(real = evaluationData2$`Expected Output`, predict = evaluationData2$`NeuralNetwork Output`) | ||
|
||
# MLP NN - Configuration 3 | ||
relation3 <- as.formula("wineD_original_data~v2+v3+v4+v5+v6") | ||
mlp3 <- neuralnet(formula = relation3, data = train,hidden = c(3,2), stepmax = 1e+10) | ||
plot(mlp3) | ||
|
||
# Make prediction on X test data | ||
y_pred3 <- neuralnet::compute(mlp3, x_test) | ||
evaluationData3 <- predictedVsreal(y_test, y_pred3) | ||
mlpTestResult3 <- evaluationFunction(real = evaluationData3$`Expected Output`, predict = evaluationData3$`NeuralNetwork Output`) | ||
|
||
# MLP NN - Configuration 4 | ||
relation4 <- as.formula("wineD_original_data~v2+v3+v4+v5+v6") | ||
mlp4 <- neuralnet(formula = relation4, data = train,hidden = c(5,2), stepmax = 1e+10, learningrate = 0.001) | ||
plot(mlp4) | ||
|
||
# Make prediction on X test data | ||
y_pred4 <- neuralnet::compute(mlp4, x_test) | ||
evaluationData4 <- predictedVsreal(y_test, y_pred4) | ||
mlpTestResult4 <- evaluationFunction(real = evaluationData4$`Expected Output`, predict = evaluationData4$`NeuralNetwork Output`) | ||
|
||
# MLP NN - Configuration 5 | ||
train0 <- train[1:4] | ||
relation5 <- as.formula("wineD_original_data~v2+v3+v4") | ||
mlp5 <- neuralnet(formula = relation5, data = train0,hidden = c(4,3), stepmax = 1e+10, learningrate = 0.001) | ||
plot(mlp5) | ||
|
||
x_test0 <- test[2:4] | ||
|
||
# Make prediction on X test data | ||
# (Note: First try with configuration 1 and run this 98:100 and then 2, 3, 4, 5) | ||
y_pred5 <- neuralnet::compute(mlp5, x_test0) | ||
evaluationData5 <- predictedVsreal(y_test, y_pred5) | ||
mlpTestResult5 <- evaluationFunction(real = evaluationData5$`Expected Output`, predict = evaluationData5$`NeuralNetwork Output`) | ||
|
||
comp <- rbind(mlpTestResult1, mlpTestResult2, mlpTestResult3, mlpTestResult4, mlpTestResult5) | ||
colnames(comp) <- c("RMSE","MAE","MAPE") | ||
rownames(comp) <- c("config-1","config-2","config-3","config-4","config-5") | ||
comp | ||
|
||
write.xlsx(comp, | ||
file = "C:/Users/RANDUL/Desktop/2nd Semester of 2nd Year/5DATA001C.2 Machine Learning and Data Mining/CW/comparison-powerusage.xlsx" | ||
,col.names = TRUE, append = TRUE, row.names = TRUE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
# import libraries | ||
library(readxl) | ||
library(fpc) | ||
library(NbClust) | ||
library(dplyr) | ||
library(MASS) | ||
library(caret) | ||
library(flexclust) | ||
|
||
|
||
# Read the File of White wine_v2 | ||
wineD <- read_xlsx("C:/Users/RANDUL/Desktop/2nd Semester of 2nd Year/5DATA001C.2 Machine Learning and Data Mining/CW/Whitewine_v2.xlsx") | ||
boxplot(wineD) | ||
|
||
# outliers removal | ||
wineD_summary <- summary(wineD$`residual sugar`) | ||
wineD_summary | ||
|
||
# Estimate interquartile range | ||
# (3rd interquartile minus 1st interquartile) | ||
interqr <- wineD_summary[[5]] - wineD_summary[[2]] | ||
|
||
|
||
# Identifying the bounds for outliers | ||
lower_limit <- wineD_summary[[2]] - (1.5 * interqr) | ||
upper_limit <- wineD_summary[[5]] + (1.5 * interqr) | ||
|
||
# Identifying the outliers | ||
outliers <- wineD %>% | ||
filter(`residual sugar`> upper_limit | `residual sugar`< lower_limit) | ||
|
||
# Outliers are removed from the data frame, but a new dataframe called "no outliers" is created. | ||
no_outliers <- wineD %>% | ||
filter(`residual sugar` < upper_limit & `residual sugar` > lower_limit) | ||
|
||
wineD_summary <- summary(wineD$`free sulfur dioxide`) | ||
interqr <- wineD_summary[[5]] - wineD_summary[[2]] | ||
|
||
# The bounds are established with the wineD data | ||
lower_limit <- wineD_summary[[2]] - (1.5 * interqr) | ||
upper_limit <- wineD_summary[[5]] + (1.5 * interqr) | ||
|
||
outliers <- rbind(outliers,wineD %>% | ||
filter(`free sulfur dioxide` > upper_limit | | ||
`free sulfur dioxide` < lower_limit)) | ||
|
||
no_outliers <- no_outliers %>% | ||
filter(`free sulfur dioxide` < upper_limit & `free sulfur dioxide` > lower_limit) | ||
|
||
|
||
|
||
# Repeat for fixed acidity | ||
wineD_summary <- summary(wineD$`total sulfur dioxide`) | ||
interqr <- wineD_summary[[5]] - wineD_summary[[2]] | ||
|
||
# Remember that the bounds are based on the wineD data | ||
lower_limit <- wineD_summary[[2]] - (1.5 * interqr) | ||
upper_limit <- wineD_summary[[5]] + (1.5 * interqr) | ||
|
||
# Removing fixed acidity outliers from the no_outliers data, not the wineD | ||
outliers <- rbind(outliers,wineD %>% | ||
filter(`total sulfur dioxide` > upper_limit | | ||
`total sulfur dioxide` < lower_limit)) | ||
|
||
no_outliers <- no_outliers %>% | ||
filter(`total sulfur dioxide` < upper_limit & `total sulfur dioxide` > lower_limit) | ||
|
||
boxplot(no_outliers) | ||
|
||
|
||
|
||
# Scaling | ||
wine_stand<- scale(no_outliers[-12]) | ||
summary(wine_stand) | ||
|
||
# funct NbClust() | ||
set.seed(1234) | ||
nc <- NbClust(wine_stand, | ||
min.nc=2, max.nc=8, | ||
method="kmeans") | ||
|
||
barplot(table(nc$Best.n[1,]), | ||
xlab="Total Number of Clusters", | ||
ylab="Total Number of Criterias", | ||
main="Total Number of Clusters Chosen by 9 Criteria") | ||
|
||
ws <- 0 | ||
for (i in 1:9){ | ||
ws[i] <- | ||
sum(kmeans(wine_stand, centers=i)$withinss)} | ||
|
||
plot(1:9, | ||
ws, | ||
type="b", | ||
xlab="Total Number of Clusters", | ||
ylab="Within groups sum of squares") | ||
|
||
# k means = 2 | ||
fit.km2 <- kmeans(wine_stand,2) | ||
plotcluster(wine_stand, fit.km2$cluster) | ||
confuse <- table(no_outliers$quality,fit.km2$cluster) | ||
confuse | ||
parcoord(wine_stand, fit.km2$cluster) | ||
|
||
# k means = 3 | ||
fit.km3 <- kmeans(wine_stand , 3) | ||
fit.km3 | ||
confuse3 <- table(no_outliers$quality,fit.km3$cluster) | ||
confuse | ||
parcoord(wine_stand, fit.km2$cluster) | ||
|
||
# k means = 4 | ||
fit.km4 <- kmeans(wine_stand, 4) | ||
table(no_outliers$quality,fit.km4$cluster) | ||
confuse | ||
parcoord(wine_stand, fit.km2$cluster) | ||
|
||
# k means = 5 | ||
fit.km5 <- kmeans(wine_stand, 5) | ||
table(no_outliers$quality,fit.km5$cluster) | ||
confuse | ||
parcoord(wine_stand, fit.km2$cluster) | ||
|
||
plotcluster(wine_stand,fit.km5$cluster) | ||
|
||
# Evaluation with ARI for k=2 | ||
randIndex(confuse) | ||
|
||
# NbClust() with Manhattan distance | ||
clusters_manhattan <- NbClust(wine_stand,distance="manhattan",min.nc=2,max.nc=5,method="kmeans", | ||
index="all") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# Import libraries | ||
library(tidyverse) | ||
library(readxl) | ||
library(tidymodels) | ||
library(readxl) | ||
library(caTools) | ||
library(xlsx) | ||
|
||
# --------------- PREPARATION OF DATA BEFRORE APPROACH --------------- # | ||
# Load Data | ||
power_usage <- read_excel("C:/Users/RANDUL/Desktop/2nd Semester of 2nd Year/5DATA001C.2 Machine Learning and Data Mining/CW/UoW_load.xlsx") | ||
View(power_usage) | ||
|
||
#09:00 , 10: 00, 11:00 | ||
power_usage <- power_usage$`11:00` | ||
View(power_usage) | ||
str(power_usage) | ||
plot(power_usage) | ||
|
||
# Build the Partial Auto Correlation plot | ||
pacf(x = power_usage, plot = TRUE) | ||
|
||
# Create Lags for Time Series | ||
lag1 = lag(power_usage,1) | ||
lag2 = lag(power_usage,2) | ||
lag3 = lag(power_usage,3) | ||
lag4 = lag(power_usage,4) | ||
lag5 = lag(power_usage,5) | ||
power_usage <- cbind(power_usage,lag1,lag2,lag3,lag4,lag5) | ||
|
||
# Formatting power_usage | ||
power_usage <- na.omit(power_usage) | ||
sum(is.na(power_usage)) | ||
|
||
colnames(power_usage) <- c("wineD_original_data","v2","v3","v4","v5","v6") | ||
|
||
# Scaling Data | ||
scaled_poweruse <- scale(power_usage) | ||
str(scaled_poweruse) | ||
|
||
# Save the processed data to excel file | ||
# For Excel files separately 09:00 , 10:00, 11:00 hours | ||
write.xlsx(scaled_poweruse, | ||
file = "C:/Users/RANDUL/Desktop/2nd Semester of 2nd Year/5DATA001C.2 Machine Learning and Data Mining/CW/scaled_powerusage_09.xlsx", | ||
col.names = TRUE, append = TRUE, row.names = FALSE) |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.