Commit a2eebaa0 by O'Reilly Media, Inc.

Initial commit

parents
#1. SET DIRECTORY AND FILE PATH
sample_stocks<-read.csv("S:/Work/Business and Client Work/Platforms/michaeljgrogan.com/Content/Courses/O Reilly/Video 3/14 Video 3 Code and Output/sample_stocks.csv")
attach(sample_stocks)
#2. USE WITHIN GROUPS SUM OF SQUARES (WSS) TO DETERMINE NUMBER OF CLUSTERS
wss <- (nrow(sample_stocks)-1)*sum(apply(sample_stocks,2,var))
for (i in 2:20) wss[i] <- sum(kmeans(sample_stocks,
centers=i)$withinss)
plot(1:20, wss, type="b", xlab="Number of Clusters",
ylab="Within groups sum of squares")
title("Number of Clusters")
#3. K-MEANS CLUSTER ANALYSIS
fit <- kmeans(sample_stocks, 3) # 3 cluster solution
aggregate(sample_stocks,by=list(fit$cluster),FUN=mean)
sample_stocks <- data.frame(sample_stocks, fit$cluster)
sample_stocks
sample_stocks$fit.cluster <- as.factor(sample_stocks$fit.cluster)
library(ggplot2)
ggplot(sample_stocks, aes(x=dividendyield, y=returns, color = sample_stocks$fit.cluster)) + geom_point() + ggtitle("K-Means Clustering")
\ No newline at end of file
#1. SET DIRECTORY AND FILE PATH
setwd("S:/Work/Business and Client Work/Platforms/michaeljgrogan.com/Content/Courses/O Reilly/Video 3/14 Video 3 Code and Output")
fullData <- read.csv("cars.csv")
attach(fullData)
#2. DEFINE RESPONSE VARIABLE
fullData$response[CarSales > 24000] <- ">24000"
fullData$response[CarSales > 1000 & CarSales <= 24000] <- ">1000 & <24000"
fullData$response[CarSales <= 1000] <- "<1000"
fullData$response<-as.factor(fullData$response)
str(fullData)
#3. TRAINING AND TEST DATA
inputData <- fullData[1:770, ] # training data
testData <- fullData[771:963, ] # test data
#4. CLASSIFICATION TREE
library(rpart)
formula=response~Age+Gender+Miles+Debt+Income
dtree=rpart(formula,data=inputData,method="class",control=rpart.control(minsplit=30,cp=0.001))
plot(dtree)
text(dtree)
summary(dtree)
printcp(dtree)
plotcp(dtree)
printcp(dtree)
#5. PRUNED CLASSIFICATION TREE
pdtree<- prune(dtree, cp=dtree$cptable[which.min(dtree$cptable[,"xerror"]),"CP"])
plot(pdtree, uniform=TRUE,
main="Pruned Classification Tree For Sales")
text(pdtree, use.n=TRUE, all=TRUE, cex=.8)
out <- predict(pdtree)
table(out[1:193],testData$response)
response_predicted <- colnames(out)[max.col(out, ties.method = c("first"))] # predicted
response_input <- as.character (testData$response) # actuals
mean (response_input != response_predicted) # misclassification %
#6. REGRESSION TREE
fitreg <- rpart(CarSales~Age+Gender+Miles+Debt+Income,
method="anova", data=inputData)
printcp(fitreg)
plotcp(fitreg)
summary(fitreg)
par(mfrow=c(1,2))
rsq.rpart(fitreg) # cross-validation results
#7. PRUNE REGRESSION TREE
pfitreg<- prune(fitreg, cp=fitreg$cptable[which.min(fitreg$cptable[,"xerror"]),"CP"]) # from cptable
plot(pfitreg, uniform=TRUE,
main="Pruned Regression Tree for Sales")
text(pfitreg, use.n=TRUE, all=TRUE, cex=.8)
#8. RANDOM FOREST
library(randomForest)
fitregforest <- randomForest(CarSales~Age+Gender+Miles+Debt+Income,data=inputData)
print(fitregforest) # view results
importance(fitregforest) # importance of each predictor
plot(fitregforest)
\ No newline at end of file
#1. SET DIRECTORY AND FILE PATH
setwd("S:/Work/Business and Client Work/Platforms/michaeljgrogan.com/Content/Courses/O Reilly/Video 3/14 Video 3 Code and Output")
fullData <- read.csv("cars.csv")
attach(fullData)
Gender=as.numeric(Gender)
fullData<-data.frame(CarSales,Age,Gender,Miles,Debt,Income)
attach(fullData)
#2. MAX-MIN NORMALIZATION
normalize <- function(x) {
return ((x - min(x)) / (max(x) - min(x)))
}
maxmindf <- as.data.frame(lapply(fullData, normalize))
#3.TRAINING AND TEST DATA
trainset <- maxmindf[1:770, ]
testset <- maxmindf[771:963, ]
#4. NEURAL NETWORK
library(neuralnet)
nn <- neuralnet(CarSales ~ Age + Gender + Miles + Debt + Income, data=trainset, hidden=c(5,2), linear.output=TRUE, threshold=0.01)
nn$result.matrix
plot(nn)
temp_test <- subset(testset, select = c("Age", "Gender", "Miles", "Debt", "Income"))
head(temp_test)
nn.results <- compute(nn, temp_test)
#5. MODEL VALIDATION
results <- data.frame(actual = testset$CarSales, prediction = nn.results$net.result)
results
predicted=results$prediction * abs(diff(range(CarSales))) + min(CarSales)
actual=results$actual * abs(diff(range(CarSales))) + min(CarSales)
comparison=data.frame(predicted,actual)
deviation=((actual-predicted)/actual)
comparison=data.frame(predicted,actual,deviation)
accuracy=1-abs(mean(deviation))
accuracy
\ No newline at end of file
returns,dividendyield
-19,0
-13,0
-14,0
-9,0
-19,0
-10,0
-20,0
-11,0
-12,0
-10,0
-13,0
-14,0
-14,0
-12,0
-19,0
-10,0
-11,0
-11,0
-13,0
-11,0
-9,0
-16,0
-9,0
-18,0
-19,0
-19,0.1
-13,0.1
-12,0.1
-8,0.1
-17,0.1
-13,0.1
-9,0.1
-17,0.1
-8,0.1
-20,0.1
-17,0.1
-19,0.1
-11,0.1
-8,0.2
-13,0.2
-17,0.2
-8,0.2
-8,0.2
-20,0.2
-13,0.2
-18,0.2
-16,0.2
-11,0.2
-11,0.2
-10,0.2
-8,0.2
-14,0.2
-20,0.2
-11,0.2
-10,0.2
-20,0.2
-10,0.3
-9,0.3
-10,0.3
-13,0.3
-17,0.3
-10,0.3
-15,0.3
-10,0.3
-13,0.3
-8,0.3
-11,0.3
-9,0.3
-14,0.3
-17,0.3
-16,0.3
-18,0.3
-10,0.3
-12,0.3
-11,0.3
-11,0.4
-17,0.4
-10,0.4
-11,0.4
-8,0.4
-11,0.4
-11,0.4
-17,0.4
-15,0.4
-20,0.4
-16,0.4
-13,0.4
-13,0.4
-17,0.4
-11,0.4
-11,0.4
-19,0.4
-20,0.4
-13,0.4
-17,0.4
-10,0.4
-13,0.4
-13,0.4
-13,0.5
-10,0.5
-16,0.5
-19,0.5
-15,0.5
-13,0.5
-18,0.5
-20,0.5
-9,0.5
-10,0.5
-10,0.5
-19,0.5
-16,0.5
-15,0.5
-12,0.5
-9,0.5
-13,0.5
-20,0.5
-9,0.6
-19,0.6
-18,0.6
-18,0.6
-17,0.6
-16,0.6
-11,0.6
-16,0.6
-17,0.6
-10,0.6
-16,0.6
-19,0.6
-8,0.6
-14,0.6
-18,0.6
-16,0.6
-14,0.6
-16,0.6
-20,0.6
-15,0.7
-20,0.7
-12,0.7
-19,0.7
-13,0.7
-15,0.7
-14,0.7
-11,0.7
-14,0.7
-19,0.7
-14,0.7
-20,0.7
-14,0.7
-16,0.7
-19,0.7
-13,0.7
-19,0.7
-13,0.7
-8,0.7
-20,0.7
-20,0.7
-14,0.7
-15,0.7
-19,0.7
-19,0.7
-19,0.8
-9,0.8
-17,0.8
-15,0.8
-13,0.8
-15,0.8
-12,0.8
-20,0.8
-13,0.8
-13,0.8
-17,0.8
-18,0.8
-19,0.8
-12,0.8
-15,0.8
-10,0.8
-12,0.8
-19,0.8
-20,0.8
-10,0.9
-16,0.9
-11,0.9
-15,0.9
-19,0.9
-10,0.9
-11,0.9
-13,0.9
-12,0.9
-10,0.9
-12,0.9
-10,0.9
-15,0.9
-12,0.9
-15,0.9
-16,0.9
-13,0.9
-15,0.9
-15,0.9
-12,0.9
14,2
10,2
7,2
10,2
14,2
14,2
12,2
7,2
14,2
6,2
10,2
12,2
9,2
8,2
7,2.1
11,2.1
8,2.1
8,2.1
11,2.1
7,2.1
6,2.1
11,2.1
6,2.1
10,2.1
16,2.1
11,2.1
10,2.1
15,2.1
16,2.1
10,2.1
13,2.1
7,2.1
15,2.1
7,2.1
14,2.1
14,2.1
7,2.1
6,2.1
7,2.2
5,2.2
9,2.2
5,2.2
10,2.2
14,2.2
10,2.2
10,2.2
5,2.2
16,2.2
16,2.2
13,2.2
16,2.2
7,2.2
14,2.2
16,2.2
5,2.2
12,2.2
16,2.2
14,2.2
11,2.2
11,2.2
9,2.2
10,2.3
16,2.3
7,2.3
6,2.3
15,2.3
9,2.3
13,2.3
5,2.3
15,2.3
6,2.3
7,2.3
13,2.3
15,2.3
6,2.3
11,2.3
7,2.3
6,2.4
16,2.4
12,2.4
8,2.4
7,2.4
10,2.4
9,2.4
9,2.4
16,2.4
7,2.4
12,2.4
15,2.4
11,2.4
11,2.4
13,2.4
12,2.4
12,2.4
8,2.4
10,2.4
6,2.5
14,2.5
12,2.5
14,2.5
12,2.5
6,2.5
7,2.5
15,2.5
6,2.5
15,2.5
7,2.5
15,2.5
9,2.5
10,2.5
6,2.5
6,2.5
15,2.5
10,2.5
7,2.5
14,2.5
14,2.5
11,2.5
9,2.5
15,2.5
10,2.6
12,2.6
5,2.6
12,2.6
8,2.6
13,2.6
10,2.6
12,2.6
16,2.6
6,2.6
6,2.6
10,2.6
7,2.6
12,2.6
13,2.6
6,2.7
10,2.7
7,2.7
7,2.7
11,2.7
12,2.7
5,2.7
9,2.7
7,2.7
14,2.7
16,2.7
11,2.7
8,2.7
16,2.7
14,2.7
7,2.7
8,2.7
6,2.7
6,2.7
9,2.7
6,2.7
15,2.7
14,2.8
9,2.8
11,2.8
8,2.8
7,2.8
9,2.8
10,2.8
10,2.8
7,2.8
9,2.8
12,2.8
12,2.8
13,2.8
11,2.8
8,2.8
10,2.8
5,2.8
12,2.8
15,2.8
6,2.8
7,2.8
15,2.8
14,2.8
15,2.8
10,2.9
14,2.9
15,2.9
6,2.9
5,2.9
16,2.9
5,2.9
8,2.9
14,2.9
12,2.9
9,2.9
13,2.9
10,2.9
15,2.9
8,2.9
13,2.9
13,2.9
7,2.9
12,2.9
33,4
37,4
33,4
30,4
40,4
36,4
31,4
31,4
39,4
38,4
30,4
33,4
32,4
33,4
33,4
31,4
38,4
32,4.1
29,4.1
40,4.1
30,4.1
37,4.1
35,4.1
35,4.1
36,4.1
35,4.1
33,4.1
28,4.1
36,4.1
35,4.1
30,4.1
32,4.1
33,4.1
29,4.1
40,4.1
39,4.1
37,4.1
34,4.1
31,4.1
31,4.1
32,4.1
29,4.2
32,4.2
32,4.2
31,4.2
33,4.2
33,4.2
33,4.2
36,4.2
36,4.2
29,4.2
32,4.2
30,4.2
31,4.2
31,4.2
28,4.2
32,4.2
32,4.2
35,4.2
28,4.2
39,4.2
36,4.2
39,4.2
32,4.3
37,4.3
35,4.3
39,4.3
38,4.3
38,4.3
30,4.3
33,4.3
38,4.3
40,4.3
30,4.3
34,4.3
35,4.3
40,4.3
40,4.3
36,4.3
38,4.3
40,4.3
31,4.3
36,4.3
28,4.3
34,4.3
35,4.3
32,4.4
37,4.4
35,4.4
38,4.4
31,4.4
30,4.4
37,4.4
28,4.4
29,4.4
38,4.5
37,4.5
37,4.5
31,4.5
31,4.5
40,4.5
40,4.5
38,4.5
29,4.5
37,4.5
32,4.5
32,4.5
40,4.5
36,4.5
33,4.5
38,4.5
37,4.5
40,4.5
33,4.5
28,4.5
32,4.5
37,4.5
29,4.5
37,4.6
39,4.6
29,4.6
37,4.6
39,4.6
31,4.6
31,4.6
30,4.6
36,4.6
35,4.6
40,4.6
34,4.6
31,4.6
28,4.6
34,4.6
29,4.6
28,4.6
33,4.6
37,4.6
37,4.6
30,4.6
31,4.6
32,4.7
34,4.7
34,4.7
32,4.7
30,4.7
33,4.7
35,4.7
32,4.7
31,4.7
35,4.7
40,4.7
40,4.7
32,4.7
30,4.7
29,4.7
33,4.7
31,4.8
31,4.8
37,4.8
40,4.8
34,4.8
40,4.8
31,4.8
35,4.8
38,4.8
40,4.8
29,4.8
29,4.8
31,4.8
33,4.8
36,4.8
28,4.8
30,4.8
36,4.8
28,4.8
36,4.8
34,4.8
39,4.9
37,4.9
31,4.9
33,4.9
29,4.9
30,4.9
32,4.9
29,4.9
32,4.9
39,4.9
33,4.9
35,4.9
32,4.9
34,4.9
40,4.9
31,4.9
39,4.9
37,4.9
35,4.9
35,4.9
35,4.9
40,4.9
28,4.9
39,4.9
1,0
10,0
5,0
6,0
5,0
17,0
16,0
2,0
12,0
26,1
-1,1
19,1
14,1
-12,1
13,1
26,1
16,1
19,1
-5,2
-1,2
38,2
22,2
18,2
12,2
7,2
17,2
-19,2
18,2
-4,3
-9,3
13,3
33,3
5,3
40,3
11,3
14,3
31,3
26,3
22,4
33,4
39,4
25,4
3,4
4,4
13,4
1,4
22,4
-16,4