Initial commit

parents
## Example files for the title:
# R Graphics Cookbook, by Winston Chang
[![R Graphics Cookbook, by Winston Chang](http://akamaicovers.oreilly.com/images/9781449316945/cat.gif)](https://www.safaribooksonline.com/library/view/title/9781449363086//)
The following applies to example files from material published by O’Reilly Media, Inc. Content from other publishers may include different rules of usage. Please refer to any additional usage rights explained in the actual example files or refer to the publisher’s website.
O'Reilly books are here to help you get your job done. In general, you may use the code in O'Reilly books in your programs and documentation. You do not need to contact us for permission unless you're reproducing a significant portion of the code. For example, writing a program that uses several chunks of code from our books does not require permission. Answering a question by citing our books and quoting example code does not require permission. On the other hand, selling or distributing a CD-ROM of examples from O'Reilly books does require permission. Incorporating a significant amount of example code from our books into your product's documentation does require permission.
We appreciate, but do not require, attribution. An attribution usually includes the title, author, publisher, and ISBN.
If you think your use of code examples falls outside fair use or the permission given here, feel free to contact us at <permissions@oreilly.com>.
Please note that the examples are not production code and have not been carefully testing. They are provided "as-is" and come with no warranty of any kind.
appendix: Introduction to ggplot2
==================
library(gcookbook) # For the data set
simpledat
A1 A2 A3
B1 10 7 12
B2 9 11 6
====================================
barplot(simpledat, beside=TRUE)
====================================
t(simpledat)
B1 B2
A1 10 9
A2 7 11
A3 12 6
====================================
barplot(t(simpledat), beside=TRUE)
====================================
plot(simpledat[1,], type="l")
lines(simpledat[2,], type="l", col="blue")
====================================
simpledat_long
Aval Bval value
A1 B1 10
A1 B2 9
A2 B1 7
A2 B2 11
A3 B1 12
A3 B2 6
====================================
library(ggplot2)
ggplot(simpledat_long, aes(x=Aval, y=value, fill=Bval)) +
geom_bar(stat="identity", position="dodge")
====================================
ggplot(simpledat_long, aes(x=Bval, y=value, fill=Aval)) +
geom_bar(stat="identity", position="dodge")
====================================
ggplot(simpledat_long, aes(x=Aval, y=value, colour=Bval, group=Bval)) +
geom_line()
====================================
dat <- data.frame(xval=1:4, yval=c(3,5,6,9), group=c("A","B","A","B"))
dat
xval yval group
1 3 A
2 5 B
3 6 A
4 9 B
====================================
ggplot(dat, aes(x=xval, y=yval))
====================================
ggplot(dat, aes(x=xval, y=yval)) + geom_point()
====================================
p <- ggplot(dat, aes(x=xval, y=yval))
p + geom_point()
====================================
p + geom_point(aes(colour=group))
====================================
p + geom_point(colour="blue")
====================================
p + geom_point() + scale_x_continuous(limits=c(0,8))
====================================
p + geom_point() +
scale_colour_manual(values=c("orange","forestgreen"))
==================
\ No newline at end of file
preface: Preface
==================
install.packages("ggplot2")
install.packages("gcookbook")
====================================
library(ggplot2)
==================
\ No newline at end of file
chapter: R Basics
==================
install.packages(c("ggplot2", "gcookbook"))
====================================
library(ggplot2)
library(gcookbook)
====================================
install.packages("ggplot2")
====================================
library(ggplot2)
====================================
data <- read.csv("datafile.csv")
====================================
data <- read.csv("datafile.csv", header=FALSE)
====================================
# Manually assign the header names
names(data) <- c("Column1","Column2","Column3")
====================================
data <- read.csv("datafile.csv", sep="\t")
====================================
data <- read.csv("datafile.csv", stringsAsFactors=FALSE)
# Convert to factor
data$Sex <- factor(data$Sex)
str(data)
'data.frame': 3 obs. of 4 variables:
$ First : chr "Currer" "Dr." ""
$ Last : chr "Bell" "Seuss" "Student"
$ Sex : Factor w/ 2 levels "F","M": 1 2 NA
$ Number: int 2 49 21
====================================
# Only need to install once
install.packages("xlsx")
library(xslx)
data <- read.xlsx("datafile.xlsx", 1)
====================================
# Only need to install once
install.packages("gdata")
library(gdata)
# Read first sheet
data <- read.xls("datafile.xls")
====================================
data <- read.xlsx("datafile.xls", sheetIndex=2)
data <- read.xlsx("datafile.xls", sheetName="Revenues")
====================================
data <- read.xls("datafile.xls", sheet=2)
====================================
# Only need to install the first time
install.packages("foreign")
library(foreign)
data <- read.spss("datafile.sav")
==================
\ No newline at end of file
chapter: Quickly Exploring Data
==================
plot(mtcars$wt, mtcars$mpg)
====================================
library(ggplot2)
qplot(mtcars$wt, mtcars$mpg)
====================================
qplot(wt, mpg, data=mtcars)
# This is equivalent to:
ggplot(mtcars, aes(x=wt, y=mpg)) + geom_point()
====================================
plot(pressure$temperature, pressure$pressure, type="l")
====================================
plot(pressure$temperature, pressure$pressure, type="l")
points(pressure$temperature, pressure$pressure)
lines(pressure$temperature, pressure$pressure/2, col="red")
points(pressure$temperature, pressure$pressure/2, col="red")
====================================
library(ggplot2)
qplot(pressure$temperature, pressure$pressure, geom="line")
====================================
qplot(temperature, pressure, data=pressure, geom="line")
# This is equivalent to:
ggplot(pressure, aes(x=temperature, y=pressure)) + geom_line()
# Lines and points together
qplot(temperature, pressure, data=pressure, geom=c("line", "point"))
# Equivalent to:
ggplot(pressure, aes(x=temperature, y=pressure)) + geom_line() + geom_point()
====================================
barplot(BOD$demand, names.arg=BOD$Time)
====================================
table(mtcars$cyl)
4 6 8
11 7 14
# There are 11 cases of the value 4, 7 cases of 6, and 14 cases of 8
====================================
# Generate a table of counts
barplot(table(mtcars$cyl))
====================================
library(ggplot2)
qplot(BOD$Time, BOD$demand, geom="bar", stat="identity")
# Convert the x variable to a factor, so that it is treated as discrete
qplot(factor(BOD$Time), BOD$demand, geom="bar", stat="identity")
====================================
# cyl is continuous here
qplot(mtcars$cyl)
# Treat cyl as discrete
qplot(factor(mtcars$cyl))
====================================
# Bar graph of values. This uses the BOD data frame, with the
#"Time" column for x values and the "demand" column for y values.
qplot(Time, demand, data=BOD, geom="bar", stat="identity")
# This is equivalent to:
ggplot(BOD, aes(x=Time, y=demand)) + geom_bar(stat="identity")
# Bar graph of counts
qplot(factor(cyl), data=mtcars)
# This is equivalent to:
ggplot(mtcars, aes(x=factor(cyl))) + geom_bar()
====================================
hist(mtcars$mpg)
# Specify approximate number of bins with breaks
hist(mtcars$mpg, breaks=10)
====================================
qplot(mtcars$mpg)
====================================
library(ggplot2)
qplot(mpg, data=mtcars, binwidth=4)
# This is equivalent to:
ggplot(mtcars, aes(x=mpg)) + geom_histogram(binwidth=4)
====================================
plot(ToothGrowth$supp, ToothGrowth$len)
====================================
# Formula syntax
boxplot(len ~ supp, data = ToothGrowth)
# Put interaction of two variables on x-axis
boxplot(len ~ supp + dose, data = ToothGrowth)
====================================
library(ggplot2)
qplot(ToothGrowth$supp, ToothGrowth$len, geom="boxplot")
====================================
qplot(supp, len, data=ToothGrowth, geom="boxplot")
# This is equivalent to:
ggplot(ToothGrowth, aes(x=supp, y=len)) + geom_boxplot()
====================================
# Using three separate vectors
qplot(interaction(ToothGrowth$supp, ToothGrowth$dose), ToothGrowth$len,
geom="boxplot")
# Alternatively, get the columns from the data frame
qplot(interaction(supp, dose), len, data=ToothGrowth, geom="boxplot")
# This is equivalent to:
ggplot(ToothGrowth, aes(x=interaction(supp, dose), y=len)) + geom_boxplot()
====================================
curve(x^3 - 5*x, from=-4, to=4)
====================================
# Plot a user-defined function
myfun <- function(xvar) {
1/(1 + exp(-xvar + 10))
}
curve(myfun(x), from=0, to=20)
# Add a line:
curve(1-myfun(x), add = TRUE, col = "red")
====================================
library(ggplot2)
# This sets the x range from 0 to 20
qplot(c(0,20), fun=myfun, stat="function", geom="line")
# This is equivalent to:
ggplot(data.frame(x=c(0, 20)), aes(x=x)) + stat_function(fun=myfun, geom="line")
==================
\ No newline at end of file
chapter: Line Graphs
==================
ggplot(BOD, aes(x=Time, y=demand)) + geom_line()
====================================
BOD
Time demand
1 8.3
2 10.3
3 19.0
4 16.0
5 15.6
7 19.8
====================================
BOD1 <- BOD # Make a copy of the data
BOD1$Time <- factor(BOD1$Time)
ggplot(BOD1, aes(x=Time, y=demand, group=1)) + geom_line()
====================================
# These have the same result
ggplot(BOD, aes(x=Time, y=demand)) + geom_line() + ylim(0, max(BOD$demand))
ggplot(BOD, aes(x=Time, y=demand)) + geom_line() + expand_limits(y=0)
====================================
ggplot(BOD, aes(x=Time, y=demand)) + geom_line() + geom_point()
====================================
library(gcookbook) # For the data set
ggplot(worldpop, aes(x=Year, y=Population)) + geom_line() + geom_point()
# Same with a log y-axis
ggplot(worldpop, aes(x=Year, y=Population)) + geom_line() + geom_point() +
scale_y_log10()
====================================
# Load plyr so we can use ddply() to create the example data set
library(plyr)
# Summarize the ToothGrowth data
tg <- ddply(ToothGrowth, c("supp", "dose"), summarise, length=mean(len))
# Map supp to colour
ggplot(tg, aes(x=dose, y=length, colour=supp)) + geom_line()
# Map supp to linetype
ggplot(tg, aes(x=dose, y=length, linetype=supp)) + geom_line()
====================================
tg
supp dose length
OJ 0.5 13.23
OJ 1.0 22.70
OJ 2.0 26.06
VC 0.5 7.98
VC 1.0 16.77
VC 2.0 26.14
str(tg)
'data.frame': 6 obs. of 3 variables:
$ supp : Factor w/ 2 levels "OJ","VC": 1 1 1 2 2 2
$ dose : num 0.5 1 2 0.5 1 2
$ length: num 13.23 22.7 26.06 7.98 16.77 ...
====================================
ggplot(tg, aes(x=factor(dose), y=length, colour=supp, group=supp)) + geom_line()
====================================
ggplot(tg, aes(x=factor(dose), y=length, colour=supp)) + geom_line()
geom_path: Each group consists of only one observation. Do you need to adjust the
group aesthetic?
====================================
ggplot(tg, aes(x=dose, y=length)) + geom_line()
====================================
ggplot(tg, aes(x=dose, y=length, shape=supp)) + geom_line() +
geom_point(size=4) # Make the points a little larger
ggplot(tg, aes(x=dose, y=length, fill=supp)) + geom_line() +
geom_point(size=4, shape=21) # Also use a point with a color fill
====================================
ggplot(tg, aes(x=dose, y=length, shape=supp)) +
geom_line(position=position_dodge(0.2)) + # Dodge lines by 0.2
geom_point(position=position_dodge(0.2), size=4) # Dodge points by 0.2
====================================
ggplot(BOD, aes(x=Time, y=demand)) +
geom_line(linetype="dashed", size=1, colour="blue")
====================================
# Load plyr so we can use ddply() to create the example data set
library(plyr)
# Summarize the ToothGrowth data
tg <- ddply(ToothGrowth, c("supp", "dose"), summarise, length=mean(len))
ggplot(tg, aes(x=dose, y=length, colour=supp)) +
geom_line() +
scale_colour_brewer(palette="Set1")
====================================
# If both lines have the same properties, you need to specify a variable to
# use for grouping
ggplot(tg, aes(x=dose, y=length, group=supp)) +
geom_line(colour="darkgreen", size=1.5)
# Since supp is mapped to colour, it will automatically be used for grouping
ggplot(tg, aes(x=dose, y=length, colour=supp)) +
geom_line(linetype="dashed") +
geom_point(shape=22, size=3, fill="white")
====================================
ggplot(BOD, aes(x=Time, y=demand)) +
geom_line() +
geom_point(size=4, shape=22, colour="darkred", fill="pink")
====================================
ggplot(BOD, aes(x=Time, y=demand)) +
geom_line() +
geom_point(size=4, shape=21, fill="white")
====================================
# Load plyr so we can use ddply() to create the example data set
library(plyr)
# Summarize the ToothGrowth data
tg <- ddply(ToothGrowth, c("supp", "dose"), summarise, length=mean(len))
# Save the position_dodge specification because we'll use it multiple times
pd <- position_dodge(0.2)
ggplot(tg, aes(x=dose, y=length, fill=supp)) +
geom_line(position=pd) +
geom_point(shape=21, size=3, position=pd) +
scale_fill_manual(values=c("black","white"))
====================================
# Convert the sunspot.year data set into a data frame for this example
sunspotyear <- data.frame(
Year = as.numeric(time(sunspot.year)),
Sunspots = as.numeric(sunspot.year)
)
ggplot(sunspotyear, aes(x=Year, y=Sunspots)) + geom_area()
====================================
ggplot(sunspotyear, aes(x=Year, y=Sunspots)) +
geom_area(colour="black", fill="blue", alpha=.2)
====================================
ggplot(sunspotyear, aes(x=Year, y=Sunspots)) +
geom_area(fill="blue", alpha=.2) +
geom_line()
====================================
library(gcookbook) # For the data set
ggplot(uspopage, aes(x=Year, y=Thousands, fill=AgeGroup)) + geom_area()
====================================
uspopage
Year AgeGroup Thousands
1900 <5 9181
1900 5-14 16966
1900 15-24 14951
1900 25-34 12161
1900 35-44 9273
1900 45-54 6437
1900 55-64 4026
1900 >64 3099
1901 <5 9336
1901 5-14 17158
...
====================================
ggplot(uspopage, aes(x=Year, y=Thousands, fill=AgeGroup)) +
geom_area(colour="black", size=.2, alpha=.4) +
scale_fill_brewer(palette="Blues", breaks=rev(levels(uspopage$AgeGroup)))
====================================
library(plyr) # For the desc() function
ggplot(uspopage, aes(x=Year, y=Thousands, fill=AgeGroup, order=desc(AgeGroup))) +
geom_area(colour="black", size=.2, alpha=.4) +
scale_fill_brewer(palette="Blues")
====================================
ggplot(uspopage, aes(x=Year, y=Thousands, fill=AgeGroup, order=desc(AgeGroup))) +
geom_area(colour=NA, alpha=.4) +
scale_fill_brewer(palette="Blues") +
geom_line(position="stack", size=.2)
====================================
library(gcookbook) # For the data set
library(plyr) # For the ddply() function
# Convert Thousands to Percent
uspopage_prop <- ddply(uspopage, "Year", transform,
Percent = Thousands / sum(Thousands) * 100)
====================================
ggplot(uspopage_prop, aes(x=Year, y=Percent, fill=AgeGroup)) +
geom_area(colour="black", size=.2, alpha=.4) +
scale_fill_brewer(palette="Blues", breaks=rev(levels(uspopage$AgeGroup)))
====================================
uspopage
Year AgeGroup Thousands
1900 <5 9181
1900 5-14 16966
1900 15-24 14951
1900 25-34 12161
1900 35-44 9273
1900 45-54 6437
1900 55-64 4026
1900 >64 3099
1901 <5 9336
1901 5-14 17158
...
====================================
uspopage_prop <- ddply(uspopage, "Year", transform,
Percent = Thousands / sum(Thousands) * 100)
Year AgeGroup Thousands Percent
1900 <5 9181 12.065340
1900 5-14 16966 22.296107
1900 15-24 14951 19.648067
1900 25-34 12161 15.981549
1900 35-44 9273 12.186243
1900 45-54 6437 8.459274
1900 55-64 4026 5.290825
1900 >64 3099 4.072594
1901 <5 9336 12.033409
1901 5-14 17158 22.115385
...
====================================
library(gcookbook) # For the data set
# Grab a subset of the climate data
clim <- subset(climate, Source == "Berkeley",
select=c("Year", "Anomaly10y", "Unc10y"))
clim
Year Anomaly10y Unc10y
1800 -0.435 0.505
1801 -0.453 0.493
1802 -0.460 0.486
...
2003 0.869 0.028
2004 0.884 0.029
====================================
# Shaded region
ggplot(clim, aes(x=Year, y=Anomaly10y)) +
geom_ribbon(aes(ymin=Anomaly10y-Unc10y, ymax=Anomaly10y+Unc10y),
alpha=0.2) +
geom_line()
====================================
# With a dotted line for upper and lower bounds
ggplot(clim, aes(x=Year, y=Anomaly10y)) +
geom_line(aes(y=Anomaly10y-Unc10y), colour="grey50", linetype="dotted") +
geom_line(aes(y=Anomaly10y+Unc10y), colour="grey50", linetype="dotted") +
geom_line()
==================
\ No newline at end of file
chapter: Annotations
==================
p <- ggplot(faithful, aes(x=eruptions, y=waiting)) + geom_point()
p + annotate("text", x=3, y=48, label="Group 1") +
annotate("text", x=4.5, y=66, label="Group 2")
====================================
p + annotate("text", x=3, y=48, label="Group 1", family="serif",
fontface="italic", colour="darkred", size=3) +
annotate("text", x=4.5, y=66, label="Group 2", family="serif",
fontface="italic", colour="darkred", size=3)
====================================
p + annotate("text", x=3, y=48, label="Group 1", alpha=.1) + # Normal
geom_text(x=4.5, y=66, label="Group 2", alpha=.1) # Overplotted
====================================
p + annotate("text", x=-Inf, y=Inf, label="Upper left", hjust=-.2, vjust=2) +
annotate("text", x=mean(range(faithful$eruptions)), y=-Inf, vjust=-0.4,
label="Bottom middle")
====================================
# A normal curve
p <- ggplot(data.frame(x=c(-3,3)), aes(x=x)) + stat_function(fun = dnorm)
p + annotate("text", x=2, y=0.3, parse=TRUE,
label="frac(1, sqrt(2 * pi)) * e ^ {-x^2 / 2}")
====================================
p + annotate("text", x=0, y=0.05, parse=TRUE, size=4,
label="'Function: ' * y==frac(1, sqrt(2*pi)) * e^{-x^2/2}")
====================================
library(gcookbook) # For the data set
p <- ggplot(heightweight, aes(x=ageYear, y=heightIn, colour=sex)) + geom_point()
# Add horizontal and vertical lines
p + geom_hline(yintercept=60) + geom_vline(xintercept=14)
# Add angled line
p + geom_abline(intercept=37.4, slope=1.75)
====================================
library(plyr) # For the ddply() function
hw_means <- ddply(heightweight, "sex", summarise, heightIn=mean(heightIn))
hw_means
sex heightIn
f 60.52613
m 62.06000
p + geom_hline(aes(yintercept=heightIn, colour=sex), data=hw_means,
linetype="dashed", size=1)
====================================
pg <- ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_point()
pg + geom_vline(xintercept = 2)
pg + geom_vline(xintercept = which(levels(PlantGrowth$group)=="ctrl"))
====================================
library(gcookbook) # For the data set
p <- ggplot(subset(climate, Source=="Berkeley"), aes(x=Year, y=Anomaly10y)) +
geom_line()
p + annotate("segment", x=1950, xend=1980, y=-.25, yend=-.25)
====================================
library(grid)
p + annotate("segment", x=1850, xend=1820, y=-.8, yend=-.95, colour="blue",
size=2, arrow=arrow()) +
annotate("segment", x=1950, xend=1980, y=-.25, yend=-.25,
arrow=arrow(ends="both", angle=90, length=unit(.2,"cm")))
====================================
library(gcookbook) # For the data set
p <- ggplot(subset(climate, Source=="Berkeley"), aes(x=Year, y=Anomaly10y)) +
geom_line()
p + annotate("rect", xmin=1950, xmax=1980, ymin=-1, ymax=1, alpha=.1,
fill="blue")
====================================
pg <- PlantGrowth # Make a copy of the PlantGrowth data
pg$hl <- "no" # Set all to "no"
pg$hl[pg$group=="trt2"] <- "yes" # If group is "trt2", set to "yes"
====================================
ggplot(pg, aes(x=group, y=weight, fill=hl)) + geom_boxplot() +
scale_fill_manual(values=c("grey85", "#FFDDCC"), guide=FALSE)
====================================
ggplot(PlantGrowth, aes(x=group, y=weight, fill=group)) + geom_boxplot() +
scale_fill_manual(values=c("grey85", "grey85", "#FFDDCC"), guide=FALSE)
====================================
library(gcookbook) # For the data set
# Take a subset of the cabbage_exp data for this example
ce <- subset(cabbage_exp, Cultivar == "c39")
# With a bar graph
ggplot(ce, aes(x=Date, y=Weight)) +
geom_bar(fill="white", colour="black") +
geom_errorbar(aes(ymin=Weight-se, ymax=Weight+se), width=.2)
# With a line graph
ggplot(ce, aes(x=Date, y=Weight)) +
geom_line(aes(group=1)) +
geom_point(size=4) +
geom_errorbar(aes(ymin=Weight-se, ymax=Weight+se), width=.2)
====================================
ce
Cultivar Date Weight sd n se
c39 d16 3.18 0.9566144 10 0.30250803
c39 d20 2.80 0.2788867 10 0.08819171
c39 d21 2.74 0.9834181 10 0.31098410
====================================
cabbage_exp
Cultivar Date Weight sd n se
c39 d16 3.18 0.9566144 10 0.30250803
c39 d20 2.80 0.2788867 10 0.08819171
c39 d21 2.74 0.9834181 10 0.31098410
c52 d16 2.26 0.4452215 10 0.14079141
c52 d20 3.11 0.7908505 10 0.25008887
c52 d21 1.47 0.2110819 10 0.06674995
====================================
# Bad: dodge width not specified
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(position="dodge") +
geom_errorbar(aes(ymin=Weight-se, ymax=Weight+se),
position="dodge", width=.2)
# Good: dodge width set to same as bar width (0.9)
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(position="dodge") +
geom_errorbar(aes(ymin=Weight-se, ymax=Weight+se),
position=position_dodge(0.9), width=.2)
====================================
pd <- position_dodge(.3) # Save the dodge spec because we use it repeatedly
ggplot(cabbage_exp, aes(x=Date, y=Weight, colour=Cultivar, group=Cultivar)) +
geom_errorbar(aes(ymin=Weight-se, ymax=Weight+se),
width=.2, size=0.25, colour="black", position=pd) +
geom_line(position=pd) +
geom_point(position=pd, size=2.5)
# Thinner error bar lines with size=0.25, and larger points with size=2.5
====================================
# The base plot
p <- ggplot(mpg, aes(x=displ, y=hwy)) + geom_point() + facet_grid(. ~ drv)
# A data frame with labels for each facet
f_labels <- data.frame(drv = c("4", "f", "r"), label = c("4wd", "Front", "Rear"))
p + geom_text(x=6, y=40, aes(label=label), data=f_labels)
# If you use annotate(), the label will appear in all facets
p + annotate("text", x=6, y=42, label="label text")
====================================
# This function returns a data frame with strings representing the regression
# equation, and the r^2 value
# These strings will be treated as R math expressions
lm_labels <- function(dat) {
mod <- lm(hwy ~ displ, data=dat)
formula <- sprintf("italic(y) == %.2f %+.2f * italic(x)",
round(coef(mod)[1], 2), round(coef(mod)[2], 2))
r <- cor(dat$displ, dat$hwy)
r2 <- sprintf("italic(R^2) == %.2f", r^2)
data.frame(formula=formula, r2=r2, stringsAsFactors=FALSE)
}
library(plyr) # For the ddply() function
labels <- ddply(mpg, "drv", lm_labels)
labels
drv formula r2
4 italic(y) == 30.68 -2.88 * italic(x) italic(R^2) == 0.65
f italic(y) == 37.38 -3.60 * italic(x) italic(R^2) == 0.36
r italic(y) == 25.78 -0.92 * italic(x) italic(R^2) == 0.04
# Plot with formula and R^2 values
p + geom_smooth(method=lm, se=FALSE) +
geom_text(x=3, y=40, aes(label=formula), data=labels, parse=TRUE, hjust=0) +
geom_text(x=3, y=35, aes(label=r2), data=labels, parse=TRUE, hjust=0)
====================================
# Find r^2 values for each group
labels <- ddply(mpg, "drv", summarise, r2 = cor(displ, hwy)^2)
labels$r2 <- sprintf("italic(R^2) == %.2f", labels$r2)
==================
\ No newline at end of file
chapter: Controlling the Overall Appearance of
Graphs
==================
library(gcookbook) # For the data set
p <- ggplot(heightweight, aes(x=ageYear, y=heightIn)) + geom_point()
p + ggtitle("Age and Height of Schoolchildren")
# Use \n for a newline
p + ggtitle("Age and Height\nof Schoolchildren")
====================================
# Move the title inside
p + ggtitle("Age and Height of Schoolchildren") +
theme(plot.title=element_text(vjust = -2.5))
# Use a text annotation instead
p + annotate("text", x=mean(range(heightweight$ageYear)), y=Inf,
label="Age and Height of Schoolchildren", vjust=1.5, size=6)
====================================
library(gcookbook) # For the data set