7 Examples of coding statistical operations

require(stats)
require(datasets)
require(utils)

7.1 Z-scores

scores <- c(76,80,83,97,100)
?scale
scaled.scores <- scale(scores)
hist(scores)
scores.data.frame <- data.frame(scores,scaled.scores)
scores.data.frame
plot(x = scores.data.frame$scores, 
     y = scores.data.frame$scaled.scores, 
     type = "b",
     xlab = "Raw Test Scores",
     ylab = "Scaled Test Scores")
abline(h = 0)
mean(scores)
abline(v = mean(scores))
z.scores <- c(scores - mean(scores))/sd(scores)
scores.data.frame <- data.frame(scores,scaled.scores,z.scores)
scores.data.frame

7.2 Quantiles

Here is an example using the quantile() function:

?quantile
nile.vect <- Nile
hist(nile.vect, main = "", xlab = "Nile river discharge volume")
quantile(x = nile.vect)
abline(v = quantile(x = nile.vect), col = "red", lwd = 2)

7.3 Normality

Shaprio-Wilk test of normality, the null hypothesis - data are normally distributed

?shapiro.test
?rbeta
test.dat.1 <- rbeta(n = 500, shape1 = 0.5, shape2 = 5)
hist(x = test.dat.1)
# Test if these data are normally distributed
shapiro.test(x = test.dat.1)
test.dat.2 <- rnorm(n = 500, mean = 0, sd = 1)
hist(x = test.dat.2)
# Test if these data are normally distributed
shapiro.test(x = test.dat.2)

7.4 t-test

head(ToothGrowth,15)
un.dose <- unique(ToothGrowth$dose)
ind.1 <- which(ToothGrowth$dose == un.dose[1])
ind.2 <- which(ToothGrowth$dose == un.dose[2])

t.test(ToothGrowth$len[ind.1], ToothGrowth$len[ind.2])

7.5 Linear Models

The lm function can be used to create a simple regression model. The rlm()` function accepts a number of arguments:

  • formula: describes the model
    • This is “YVAR ~ XVAR”
    • YVAR is the dependent (predicted)
    • XVAR is the independent (predictor)
# Load the UNM_Enroll.csv data file
plot(x = mtcars$wt, 
     y = mtcars$mpg,
     xlab = "Weight", 
     ylab = "mpg", 
     type = "p", 
     las = F)

model.object <- mtcars$mpg ~ mtcars$wt
model.object
lm(model.object)
  • The intercept is 37.285 and the coefficient for the unemployment rate is -5.344
  • Therefore, the complete regression equation is rmpg = 37.285 + -5.344 x Weight`.
# NHST
anova(lm(model.object))
summary(lm(model.object))
pred.y.vals <- predict(lm(model.object))
plot(x = mtcars$wt, 
     y = mtcars$mpg,
     xlab = "Weight", 
     ylab = "mpg", 
     type = "p", 
     las = F)
lines(x = mtcars$wt, pred.y.vals)

7.6 Analysis of Variance

# ANOVAs are linear models with categorical predictors
head(ToothGrowth)
anova.model.object <- ToothGrowth$len ~ ToothGrowth$dose
anova(lm(anova.model.object))