7 Examples of coding statistical operations
require(stats)
require(datasets)
require(utils)
7.1 Z-scores
<- c(76,80,83,97,100)
scores
?scale<- scale(scores)
scaled.scores hist(scores)
<- data.frame(scores,scaled.scores)
scores.data.frame
scores.data.frameplot(x = scores.data.frame$scores,
y = scores.data.frame$scaled.scores,
type = "b",
xlab = "Raw Test Scores",
ylab = "Scaled Test Scores")
abline(h = 0)
mean(scores)
abline(v = mean(scores))
<- c(scores - mean(scores))/sd(scores)
z.scores <- data.frame(scores,scaled.scores,z.scores)
scores.data.frame scores.data.frame
7.2 Quantiles
Here is an example using the quantile() function:
?quantile<- Nile
nile.vect hist(nile.vect, main = "", xlab = "Nile river discharge volume")
quantile(x = nile.vect)
abline(v = quantile(x = nile.vect), col = "red", lwd = 2)
7.3 Normality
Shaprio-Wilk test of normality, the null hypothesis - data are normally distributed
?shapiro.test
?rbeta.1 <- rbeta(n = 500, shape1 = 0.5, shape2 = 5)
test.dathist(x = test.dat.1)
# Test if these data are normally distributed
shapiro.test(x = test.dat.1)
.2 <- rnorm(n = 500, mean = 0, sd = 1)
test.dathist(x = test.dat.2)
# Test if these data are normally distributed
shapiro.test(x = test.dat.2)
7.4 t-test
head(ToothGrowth,15)
<- unique(ToothGrowth$dose)
un.dose .1 <- which(ToothGrowth$dose == un.dose[1])
ind.2 <- which(ToothGrowth$dose == un.dose[2])
ind
t.test(ToothGrowth$len[ind.1], ToothGrowth$len[ind.2])
7.5 Linear Models
The lm function can be used to create a simple regression model.
The r
lm()` function accepts a number of arguments:
- formula: describes the model
- This is “YVAR ~ XVAR”
- YVAR is the dependent (predicted)
- XVAR is the independent (predictor)
# Load the UNM_Enroll.csv data file
plot(x = mtcars$wt,
y = mtcars$mpg,
xlab = "Weight",
ylab = "mpg",
type = "p",
las = F)
<- mtcars$mpg ~ mtcars$wt
model.object
model.objectlm(model.object)
- The intercept is 37.285 and the coefficient for the unemployment rate is -5.344
- Therefore, the complete regression equation is
r
mpg = 37.285 + -5.344 x Weight`.
# NHST
anova(lm(model.object))
summary(lm(model.object))
<- predict(lm(model.object))
pred.y.vals plot(x = mtcars$wt,
y = mtcars$mpg,
xlab = "Weight",
ylab = "mpg",
type = "p",
las = F)
lines(x = mtcars$wt, pred.y.vals)