Initial commit

parents
#1. SET DIRECTORY AND FILE PATH
sample_stocks<-read.csv("S:/Work/Business and Client Work/Platforms/michaeljgrogan.com/Content/Courses/O Reilly/Video 3/14 Video 3 Code and Output/sample_stocks.csv")
attach(sample_stocks)
#2. USE WITHIN GROUPS SUM OF SQUARES (WSS) TO DETERMINE NUMBER OF CLUSTERS
wss <- (nrow(sample_stocks)-1)*sum(apply(sample_stocks,2,var))
for (i in 2:20) wss[i] <- sum(kmeans(sample_stocks,
centers=i)$withinss)
plot(1:20, wss, type="b", xlab="Number of Clusters",
ylab="Within groups sum of squares")
title("Number of Clusters")
#3. K-MEANS CLUSTER ANALYSIS
fit <- kmeans(sample_stocks, 3) # 3 cluster solution
aggregate(sample_stocks,by=list(fit$cluster),FUN=mean)
sample_stocks <- data.frame(sample_stocks, fit$cluster)
sample_stocks
sample_stocks$fit.cluster <- as.factor(sample_stocks$fit.cluster)
library(ggplot2)
ggplot(sample_stocks, aes(x=dividendyield, y=returns, color = sample_stocks$fit.cluster)) + geom_point() + ggtitle("K-Means Clustering")
\ No newline at end of file
#1. SET DIRECTORY AND FILE PATH
setwd("S:/Work/Business and Client Work/Platforms/michaeljgrogan.com/Content/Courses/O Reilly/Video 3/14 Video 3 Code and Output")
fullData <- read.csv("cars.csv")
attach(fullData)
#2. DEFINE RESPONSE VARIABLE
fullData$response[CarSales > 24000] <- ">24000"
fullData$response[CarSales > 1000 & CarSales <= 24000] <- ">1000 & <24000"
fullData$response[CarSales <= 1000] <- "<1000"
fullData$response<-as.factor(fullData$response)
str(fullData)
#3. TRAINING AND TEST DATA
inputData <- fullData[1:770, ] # training data
testData <- fullData[771:963, ] # test data
#4. CLASSIFICATION TREE
library(rpart)
formula=response~Age+Gender+Miles+Debt+Income
dtree=rpart(formula,data=inputData,method="class",control=rpart.control(minsplit=30,cp=0.001))
plot(dtree)
text(dtree)
summary(dtree)
printcp(dtree)
plotcp(dtree)
printcp(dtree)
#5. PRUNED CLASSIFICATION TREE
pdtree<- prune(dtree, cp=dtree$cptable[which.min(dtree$cptable[,"xerror"]),"CP"])
plot(pdtree, uniform=TRUE,
main="Pruned Classification Tree For Sales")
text(pdtree, use.n=TRUE, all=TRUE, cex=.8)
out <- predict(pdtree)
table(out[1:193],testData$response)
response_predicted <- colnames(out)[max.col(out, ties.method = c("first"))] # predicted
response_input <- as.character (testData$response) # actuals
mean (response_input != response_predicted) # misclassification %
#6. REGRESSION TREE
fitreg <- rpart(CarSales~Age+Gender+Miles+Debt+Income,
method="anova", data=inputData)
printcp(fitreg)
plotcp(fitreg)
summary(fitreg)
par(mfrow=c(1,2))
rsq.rpart(fitreg) # cross-validation results
#7. PRUNE REGRESSION TREE
pfitreg<- prune(fitreg, cp=fitreg$cptable[which.min(fitreg$cptable[,"xerror"]),"CP"]) # from cptable
plot(pfitreg, uniform=TRUE,
main="Pruned Regression Tree for Sales")
text(pfitreg, use.n=TRUE, all=TRUE, cex=.8)
#8. RANDOM FOREST
library(randomForest)
fitregforest <- randomForest(CarSales~Age+Gender+Miles+Debt+Income,data=inputData)
print(fitregforest) # view results
importance(fitregforest) # importance of each predictor
plot(fitregforest)
\ No newline at end of file
#1. SET DIRECTORY AND FILE PATH
setwd("S:/Work/Business and Client Work/Platforms/michaeljgrogan.com/Content/Courses/O Reilly/Video 3/14 Video 3 Code and Output")
fullData <- read.csv("cars.csv")
attach(fullData)
Gender=as.numeric(Gender)
fullData<-data.frame(CarSales,Age,Gender,Miles,Debt,Income)
attach(fullData)
#2. MAX-MIN NORMALIZATION
normalize <- function(x) {
return ((x - min(x)) / (max(x) - min(x)))
}
maxmindf <- as.data.frame(lapply(fullData, normalize))
#3.TRAINING AND TEST DATA
trainset <- maxmindf[1:770, ]
testset <- maxmindf[771:963, ]
#4. NEURAL NETWORK
library(neuralnet)
nn <- neuralnet(CarSales ~ Age + Gender + Miles + Debt + Income, data=trainset, hidden=c(5,2), linear.output=TRUE, threshold=0.01)
nn$result.matrix
plot(nn)
temp_test <- subset(testset, select = c("Age", "Gender", "Miles", "Debt", "Income"))
head(temp_test)
nn.results <- compute(nn, temp_test)
#5. MODEL VALIDATION
results <- data.frame(actual = testset$CarSales, prediction = nn.results$net.result)
results
predicted=results$prediction * abs(diff(range(CarSales))) + min(CarSales)
actual=results$actual * abs(diff(range(CarSales))) + min(CarSales)
comparison=data.frame(predicted,actual)
deviation=((actual-predicted)/actual)
comparison=data.frame(predicted,actual,deviation)
accuracy=1-abs(mean(deviation))
accuracy
\ No newline at end of file
returns,dividendyield
-19,0
-13,0
-14,0
-9,0
-19,0
-10,0
-20,0
-11,0
-12,0
-10,0
-13,0
-14,0
-14,0
-12,0
-19,0
-10,0
-11,0
-11,0
-13,0
-11,0
-9,0
-16,0
-9,0
-18,0
-19,0
-19,0.1
-13,0.1
-12,0.1
-8,0.1
-17,0.1
-13,0.1
-9,0.1
-17,0.1
-8,0.1
-20,0.1
-17,0.1
-19,0.1
-11,0.1
-8,0.2
-13,0.2
-17,0.2
-8,0.2
-8,0.2
-20,0.2
-13,0.2
-18,0.2
-16,0.2
-11,0.2
-11,0.2
-10,0.2
-8,0.2
-14,0.2
-20,0.2
-11,0.2
-10,0.2
-20,0.2
-10,0.3
-9,0.3
-10,0.3
-13,0.3
-17,0.3
-10,0.3
-15,0.3
-10,0.3
-13,0.3
-8,0.3
-11,0.3
-9,0.3
-14,0.3
-17,0.3
-16,0.3
-18,0.3
-10,0.3
-12,0.3
-11,0.3
-11,0.4
-17,0.4
-10,0.4
-11,0.4
-8,0.4
-11,0.4
-11,0.4
-17,0.4
-15,0.4
-20,0.4
-16,0.4
-13,0.4
-13,0.4
-17,0.4
-11,0.4
-11,0.4
-19,0.4
-20,0.4
-13,0.4
-17,0.4
-10,0.4
-13,0.4
-13,0.4
-13,0.5
-10,0.5
-16,0.5
-19,0.5
-15,0.5
-13,0.5
-18,0.5
-20,0.5
-9,0.5
-10,0.5
-10,0.5
-19,0.5
-16,0.5
-15,0.5
-12,0.5
-9,0.5
-13,0.5
-20,0.5
-9,0.6
-19,0.6
-18,0.6
-18,0.6
-17,0.6
-16,0.6
-11,0.6
-16,0.6
-17,0.6
-10,0.6
-16,0.6
-19,0.6
-8,0.6
-14,0.6
-18,0.6
-16,0.6
-14,0.6
-16,0.6
-20,0.6
-15,0.7
-20,0.7
-12,0.7
-19,0.7
-13,0.7
-15,0.7
-14,0.7
-11,0.7
-14,0.7
-19,0.7
-14,0.7
-20,0.7
-14,0.7
-16,0.7
-19,0.7
-13,0.7
-19,0.7
-13,0.7
-8,0.7
-20,0.7
-20,0.7
-14,0.7
-15,0.7
-19,0.7
-19,0.7
-19,0.8
-9,0.8
-17,0.8
-15,0.8
-13,0.8
-15,0.8
-12,0.8
-20,0.8
-13,0.8
-13,0.8
-17,0.8
-18,0.8
-19,0.8
-12,0.8
-15,0.8
-10,0.8
-12,0.8
-19,0.8
-20,0.8
-10,0.9
-16,0.9
-11,0.9
-15,0.9
-19,0.9
-10,0.9
-11,0.9
-13,0.9
-12,0.9
-10,0.9
-12,0.9
-10,0.9
-15,0.9
-12,0.9
-15,0.9
-16,0.9
-13,0.9
-15,0.9
-15,0.9
-12,0.9
14,2
10,2
7,2
10,2
14,2
14,2
12,2
7,2
14,2
6,2
10,2
12,2
9,2
8,2
7,2.1
11,2.1
8,2.1
8,2.1
11,2.1
7,2.1
6,2.1
11,2.1
6,2.1
10,2.1
16,2.1
11,2.1
10,2.1
15,2.1
16,2.1
10,2.1
13,2.1
7,2.1
15,2.1
7,2.1
14,2.1
14,2.1
7,2.1
6,2.1
7,2.2
5,2.2
9,2.2
5,2.2
10,2.2
14,2.2
10,2.2
10,2.2
5,2.2
16,2.2
16,2.2
13,2.2
16,2.2
7,2.2
14,2.2
16,2.2
5,2.2
12,2.2
16,2.2
14,2.2
11,2.2
11,2.2
9,2.2
10,2.3
16,2.3
7,2.3
6,2.3
15,2.3
9,2.3
13,2.3
5,2.3
15,2.3
6,2.3
7,2.3
13,2.3
15,2.3
6,2.3
11,2.3
7,2.3
6,2.4
16,2.4
12,2.4
8,2.4
7,2.4
10,2.4
9,2.4
9,2.4
16,2.4
7,2.4
12,2.4
15,2.4
11,2.4
11,2.4
13,2.4
12,2.4
12,2.4
8,2.4
10,2.4
6,2.5
14,2.5
12,2.5
14,2.5
12,2.5
6,2.5
7,2.5
15,2.5
6,2.5
15,2.5
7,2.5
15,2.5
9,2.5
10,2.5
6,2.5
6,2.5
15,2.5
10,2.5
7,2.5
14,2.5
14,2.5
11,2.5
9,2.5
15,2.5
10,2.6
12,2.6
5,2.6
12,2.6
8,2.6
13,2.6
10,2.6
12,2.6
16,2.6
6,2.6
6,2.6
10,2.6
7,2.6
12,2.6
13,2.6
6,2.7
10,2.7
7,2.7
7,2.7
11,2.7
12,2.7
5,2.7
9,2.7
7,2.7
14,2.7
16,2.7
11,2.7
8,2.7
16,2.7
14,2.7
7,2.7
8,2.7
6,2.7
6,2.7
9,2.7
6,2.7
15,2.7
14,2.8
9,2.8
11,2.8
8,2.8
7,2.8
9,2.8
10,2.8
10,2.8
7,2.8
9,2.8
12,2.8
12,2.8
13,2.8
11,2.8
8,2.8
10,2.8
5,2.8
12,2.8
15,2.8
6,2.8
7,2.8
15,2.8
14,2.8
15,2.8
10,2.9
14,2.9
15,2.9
6,2.9
5,2.9
16,2.9
5,2.9
8,2.9
14,2.9
12,2.9
9,2.9
13,2.9
10,2.9
15,2.9
8,2.9
13,2.9
13,2.9
7,2.9
12,2.9
33,4
37,4
33,4
30,4
40,4
36,4
31,4
31,4
39,4
38,4
30,4
33,4
32,4
33,4
33,4
31,4
38,4
32,4.1
29,4.1
40,4.1
30,4.1
37,4.1
35,4.1
35,4.1
36,4.1
35,4.1
33,4.1
28,4.1
36,4.1
35,4.1
30,4.1
32,4.1
33,4.1
29,4.1
40,4.1
39,4.1
37,4.1
34,4.1
31,4.1
31,4.1
32,4.1
29,4.2
32,4.2
32,4.2
31,4.2
33,4.2
33,4.2
33,4.2
36,4.2
36,4.2
29,4.2
32,4.2
30,4.2
31,4.2
31,4.2
28,4.2
32,4.2
32,4.2
35,4.2
28,4.2
39,4.2
36,4.2
39,4.2
32,4.3
37,4.3
35,4.3
39,4.3
38,4.3
38,4.3
30,4.3
33,4.3
38,4.3
40,4.3
30,4.3
34,4.3
35,4.3
40,4.3
40,4.3
36,4.3
38,4.3
40,4.3
31,4.3
36,4.3
28,4.3
34,4.3
35,4.3
32,4.4
37,4.4
35,4.4
38,4.4
31,4.4
30,4.4
37,4.4
28,4.4
29,4.4
38,4.5
37,4.5
37,4.5
31,4.5
31,4.5
40,4.5
40,4.5
38,4.5
29,4.5
37,4.5
32,4.5
32,4.5
40,4.5
36,4.5
33,4.5
38,4.5
37,4.5
40,4.5
33,4.5
28,4.5
32,4.5
37,4.5
29,4.5
37,4.6
39,4.6
29,4.6
37,4.6
39,4.6
31,4.6
31,4.6
30,4.6
36,4.6
35,4.6
40,4.6
34,4.6
31,4.6
28,4.6
34,4.6
29,4.6
28,4.6
33,4.6
37,4.6
37,4.6
30,4.6
31,4.6
32,4.7
34,4.7
34,4.7
32,4.7
30,4.7
33,4.7
35,4.7
32,4.7
31,4.7
35,4.7
40,4.7
40,4.7
32,4.7
30,4.7
29,4.7
33,4.7
31,4.8
31,4.8
37,4.8
40,4.8
34,4.8
40,4.8
31,4.8
35,4.8
38,4.8
40,4.8
29,4.8
29,4.8
31,4.8
33,4.8
36,4.8
28,4.8
30,4.8
36,4.8
28,4.8
36,4.8
34,4.8
39,4.9
37,4.9
31,4.9
33,4.9
29,4.9
30,4.9
32,4.9
29,4.9
32,4.9
39,4.9
33,4.9
35,4.9
32,4.9
34,4.9
40,4.9
31,4.9
39,4.9
37,4.9
35,4.9
35,4.9
35,4.9
40,4.9
28,4.9
39,4.9
1,0
10,0
5,0
6,0
5,0
17,0
16,0
2,0
12,0
26,1
-1,1
19,1
14,1
-12,1
13,1
26,1
16,1
19,1
-5,2
-1,2
38,2
22,2
18,2
12,2
7,2
17,2
-19,2
18,2
-4,3
-9,3
13,3
33,3
5,3
40,3
11,3
14,3
31,3
26,3
22,4
33,4
39,4
25,4
3,4
4,4
13,4
1,4
22,4
-16,4
## Example files for the title:
#
[![](http://akamaicovers.oreilly.com/images//cat.gif)](https://www.safaribooksonline.com/library/view/title///)
The following applies to example files from material published by O’Reilly Media, Inc. Content from other publishers may include different rules of usage. Please refer to any additional usage rights explained in the actual example files or refer to the publisher’s website.
O'Reilly books are here to help you get your job done. In general, you may use the code in O'Reilly books in your programs and documentation. You do not need to contact us for permission unless you're reproducing a significant portion of the code. For example, writing a program that uses several chunks of code from our books does not require permission. Answering a question by citing our books and quoting example code does not require permission. On the other hand, selling or distributing a CD-ROM of examples from O'Reilly books does require permission. Incorporating a significant amount of example code from our books into your product's documentation does require permission.
We appreciate, but do not require, attribution. An attribution usually includes the title, author, publisher, and ISBN.
If you think your use of code examples falls outside fair use or the permission given here, feel free to contact us at <permissions@oreilly.com>.
Please note that the examples are not production code and have not been carefully testing. They are provided "as-is" and come with no warranty of any kind.
File mode changed
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment