centering and standardizing with scale()
Welcome to some handy functions! These are quick ways to get some common tasks done: centering, standardizing, and getting stats (i.e. mean) for each level of a factor.
# get some data to play with
data()
# Ooo! Chickens. Let's use the ChickWeight dataset.
df <- ChickWeight
str(df)
summary(df)
head(df)
# ------------------- #
# centering #
# ------------------- #
?scale
df$weight.c <- scale(df$weight, center=TRUE, scale=FALSE)
hist(df$weight.c)
# ---------------------------- #
# scaling (z scores) #
# ---------------------------- #
df$weight.z <- scale(df$weight, center=TRUE, scale=TRUE)
hist(df$weight.z)
# ----------------------------------- #
# within levels of a factor #
# ----------------------------------- #
# lots of great ways to do this, here are two (there are so many more!)
# strategy number 1
?ave
df$ave.weight <- ave(df$weight, df$Chick)
head(df, n=15)
# you don't have to stick with the mean. you can put in any function you like.
df$max.weight <- ave(df$weight, df$Chick, FUN=max)
# you can center within levels of a factor!
df$weight.z.within <- ave(df$weight, df$Chick, FUN=scale)
head(df, n=15)
# strategy number 2
?by
hist(by(df$weight, df$Chick, FUN=mean), main = "How heavy are those chickens??")
# note that this one produces only one mean for each chick:
length(unique(df$Chick))
length(by(df$weight, df$Chick, FUN=mean))
nrow(df)
# you can put in any function you like
hist(by(df$weight, df$Chick, FUN=max), main = "What's the fattest those chickens get??")