Code
<- 2 # Make your first scalar
xs # Print the scalar
xs ## [1] 2
In R: scalars, vectors, and matrices are different kinds of “objects”.
These objects are used extensively in data analysis
Vectors are probably your most common object in R, but we will start with scalars.
Make your first scalar
<- 2 # Make your first scalar
xs # Print the scalar
xs ## [1] 2
Perform simple calculations and see how R is doing the math for you
+ 2
xs ## [1] 4
*2 # Perform and print a simple calculation
xs## [1] 4
+1)^2 # Perform and print a simple calculation
(xs## [1] 9
+ NA # often used for missing values
xs ## [1] NA
Now change xs
, predict what will happen, then re-run the code.
Make Your First Vector
<- c(0,1,3,10,6) # Your First Vector
x # Print the vector
x ## [1] 0 1 3 10 6
2] # Print the 2nd Element; 1
x[## [1] 1
+2 # Print simple calculation; 2,3,5,8,12
x## [1] 2 3 5 12 8
*2
x## [1] 0 2 6 20 12
^2
x## [1] 0 1 9 100 36
Apply mathematical calculations elementwise
+x
x## [1] 0 2 6 20 12
*x
x## [1] 0 1 9 100 36
^x
x## [1] 1.0000e+00 1.0000e+00 2.7000e+01 1.0000e+10 4.6656e+04
In R, scalars are treated as a vector with one element.
c(1)
## [1] 1
Sometimes, we will use vectors that are entirely ordered.
1:7
## [1] 1 2 3 4 5 6 7
seq(0,1,by=.1)
## [1] 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0
# Ordering data
sort(x)
## [1] 0 1 3 6 10
order(x)]
x[## [1] 0 1 3 6 10
Matrices are also common objects
<- c(1,4,9)
x1 <- c(3,0,2)
x2 <- rbind(x1, x2)
x_mat
# Print full matrix
x_mat ## [,1] [,2] [,3]
## x1 1 4 9
## x2 3 0 2
2,] # Print Second Row
x_mat[## [1] 3 0 2
2] # Print Second Column
x_mat[,## x1 x2
## 4 0
2,2] # Print Element in Second Column and Second Row
x_mat[## x2
## 0
There are elementwise calculations
+2
x_mat## [,1] [,2] [,3]
## x1 3 6 11
## x2 5 2 4
*2
x_mat## [,1] [,2] [,3]
## x1 2 8 18
## x2 6 0 4
^2
x_mat## [,1] [,2] [,3]
## x1 1 16 81
## x2 9 0 4
+ x_mat
x_mat ## [,1] [,2] [,3]
## x1 2 8 18
## x2 6 0 4
*x_mat #NOT classical matrix multiplication
x_mat## [,1] [,2] [,3]
## x1 1 16 81
## x2 9 0 4
^x_mat
x_mat## [,1] [,2] [,3]
## x1 1 256 387420489
## x2 27 1 4
And you can also use matrix algebra
<- matrix(2:7,2,3)
x_mat1
x_mat1## [,1] [,2] [,3]
## [1,] 2 4 6
## [2,] 3 5 7
<- matrix(4:-1,2,3)
x_mat2
x_mat2## [,1] [,2] [,3]
## [1,] 4 2 0
## [2,] 3 1 -1
tcrossprod(x_mat1, x_mat2) #x_mat1 %*% t(x_mat2)
## [,1] [,2]
## [1,] 16 4
## [2,] 22 7
crossprod(x_mat1, x_mat2)
## [,1] [,2] [,3]
## [1,] 17 7 -3
## [2,] 31 13 -5
## [3,] 45 19 -7
Functions are applied to objects
# Define a function that adds two to any vector
<- function(input_vector) { #input_vector is a placeholder
add_two <- input_vector + 2 # new object defined locally
output_vector return(output_vector) # return new object
}# Apply that function to a vector
<- c(0,1,3,10,6)
x add_two(input_vector=x) #same as add_two(x)
## [1] 2 3 5 12 8
Common mistakes:
print(output_vector)
# This is not available globally
# Seeing "+ add_2(x)" in the bottom console
# means you forgot to close the function with "}"
# press "Escape" and try again
# Double check your spelling
There are many many generalizations
<- function(input_vector1, input_vector2) {
add_vec <- input_vector1 + input_vector2
output_vector return(output_vector)
}add_vec(x,3)
## [1] 3 4 6 13 9
add_vec(x,x)
## [1] 0 2 6 20 12
<- function(x1, x2) {
sum_squared <- (x1 + x2)^2
y return(y)
}
sum_squared(1, 3)
## [1] 16
sum_squared(x, 2)
## [1] 4 9 25 144 64
sum_squared(x, NA)
## [1] NA NA NA NA NA
sum_squared(x, x)
## [1] 0 4 36 400 144
sum_squared(x, 2*x)
## [1] 0 9 81 900 324
Functions can take functions as arguments. Note that a statistic is defined as a function of data.
<- function(x,f){
statistic <- f(x)
y return(y)
}statistic(x, mean)
## [1] 4
You can apply functions to matrices
sum_squared(x_mat, x_mat)
## [,1] [,2] [,3]
## x1 4 64 324
## x2 36 0 16
# Apply function to each matrix row
<- apply(x_mat, 1, sum)^2
y # ?apply #checks the function details
- sum_squared(x, x) # tests if there are any differences
y ## [1] 196 21 160 -375 52
There are many possible functions you can apply
# Return Y-value with minimum absolute difference from 3
<- abs( y - 3 )
abs_diff_y # is this the luckiest number?
abs_diff_y ## x1 x2
## 193 22
#min(abs_diff_y)
#which.min(abs_diff_y)
which.min(abs_diff_y) ]
y[ ## x2
## 25
<- function(f){
fun_of_seq <- seq(1,3, length.out=12)
x1 <- x1+2
x2 <- cbind(x1,x2)
x <- f(x)
y return(y)
}fun_of_seq(mean)
## [1] 3
fun_of_seq(sd)
## [1] 1.206045
There are also some useful built in functions
<- matrix(c(1:3,2*(1:3)),byrow=TRUE,ncol=3)
m
m## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 2 4 6
# normalize rows
/rowSums(m)
m## [,1] [,2] [,3]
## [1,] 0.1666667 0.3333333 0.5
## [2,] 0.1666667 0.3333333 0.5
# normalize columns
t(t(m)/colSums(m))
## [,1] [,2] [,3]
## [1,] 0.3333333 0.3333333 0.3333333
## [2,] 0.6666667 0.6666667 0.6666667
# de-mean rows
sweep(m,1,rowMeans(m), '-')
## [,1] [,2] [,3]
## [1,] -1 0 1
## [2,] -2 0 2
# de-mean columns
sweep(m,2,colMeans(m), '-')
## [,1] [,2] [,3]
## [1,] -0.5 -1 -1.5
## [2,] 0.5 1 1.5
Applying the same function over and over again
#Create empty vector
<- vector(length=3)
exp_vector #Fill empty vector
for(i in 1:3){
<- exp(i)
exp_vector[i]
}
# Compare
exp_vector## [1] 2.718282 7.389056 20.085537
c( exp(1), exp(2), exp(3))
## [1] 2.718282 7.389056 20.085537
A more complicated example
<- function(i, j=0){
complicated_fun <- i^(i-1)
x <- x + mean( j:i )
y <- log(y)/i
z return(z)
}<- vector(length=10)
complicated_vector for(i in 1:10){
<- complicated_fun(i)
complicated_vector[i] }
A recursive example
<- vector(length=4)
x 1] <- 1
x[for(i in 2:4){
<- (x[i-1]+1)^2
x[i]
}
x## [1] 1 4 25 676
TRUE/FALSE
<- c(1,2,3,NA)
x > 2
x ## [1] FALSE FALSE TRUE NA
==2
x## [1] FALSE TRUE FALSE NA
any(x==2)
## [1] TRUE
all(x==2)
## [1] FALSE
2 %in% x
## [1] TRUE
2==TRUE
## [1] FALSE
2==FALSE
## [1] FALSE
is.numeric(x)
## [1] TRUE
is.na(x)
## [1] FALSE FALSE FALSE TRUE
The “&” and “|” commands are logical calculations that compare vectors to the left and right.
<- 1:3
x is.numeric(x) & (x < 2)
## [1] TRUE FALSE FALSE
is.numeric(x) | (x < 2)
## [1] TRUE TRUE TRUE
if(length(x) >= 5 & x[5] > 12) print("ok")
factorial(4)
## [1] 24
choose(4,2)
## [1] 6
<- 1:10
x cut(x, 4)
## [1] (0.991,3.25] (0.991,3.25] (0.991,3.25] (3.25,5.5] (3.25,5.5]
## [6] (5.5,7.75] (5.5,7.75] (7.75,10] (7.75,10] (7.75,10]
## Levels: (0.991,3.25] (3.25,5.5] (5.5,7.75] (7.75,10]
split(x, cut(x, 4))
## $`(0.991,3.25]`
## [1] 1 2 3
##
## $`(3.25,5.5]`
## [1] 4 5
##
## $`(5.5,7.75]`
## [1] 6 7
##
## $`(7.75,10]`
## [1] 8 9 10
<- split(x, cut(x, 4))
xs sapply(xs, mean)
## (0.991,3.25] (3.25,5.5] (5.5,7.75] (7.75,10]
## 2.0 4.5 6.5 9.0
# shortcut
aggregate(x, list(cut(x,4)), mean)
## Group.1 x
## 1 (0.991,3.25] 2.0
## 2 (3.25,5.5] 4.5
## 3 (5.5,7.75] 6.5
## 4 (7.75,10] 9.0