Code
<- 2 # Make your first scalar
xs # Print the scalar
xs ## [1] 2
In R: scalars, vectors, and matrices are different kinds of “objects”.
These objects are used extensively in data analysis
Vectors are probably your most common object in R, but we will start with scalars (which are treated as a special case in R).
Make your first scalar
<- 2 # Make your first scalar
xs # Print the scalar
xs ## [1] 2
Perform simple calculations and see how R is doing the math for you
+ 2
xs ## [1] 4
*2 # Perform and print a simple calculation
xs## [1] 4
+1)^2 # Perform and print a simple calculation
(xs## [1] 9
+ NA # often used for missing values
xs ## [1] NA
Now change xs
, predict what will happen, then re-run the code.
Make Your First Vector
<- c(0,1,3,10,6) # Your First Vector
x # Print the vector
x ## [1] 0 1 3 10 6
2] # Print the 2nd Element; 1
x[## [1] 1
+2 # Print simple calculation; 2,3,5,8,12
x## [1] 2 3 5 12 8
*2
x## [1] 0 2 6 20 12
^2
x## [1] 0 1 9 100 36
Apply Mathematical calculations “elementwise”
+x
x## [1] 0 2 6 20 12
*x
x## [1] 0 1 9 100 36
^x
x## [1] 1.0000e+00 1.0000e+00 2.7000e+01 1.0000e+10 4.6656e+04
See that scalars are vectors
c(1)
## [1] 1
1:7
## [1] 1 2 3 4 5 6 7
seq(0,1,by=.1)
## [1] 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0
Matrices are also common objects
<- c(1,4,9)
x1 <- c(3,0,2)
x2 <- rbind(x1, x2)
x_mat
# Print full matrix
x_mat ## [,1] [,2] [,3]
## x1 1 4 9
## x2 3 0 2
2,] # Print Second Row
x_mat[## [1] 3 0 2
2] # Print Second Column
x_mat[,## x1 x2
## 4 0
2,2] # Print Element in Second Column and Second Row
x_mat[## x2
## 0
There are elementwise calculations
+2
x_mat## [,1] [,2] [,3]
## x1 3 6 11
## x2 5 2 4
*2
x_mat## [,1] [,2] [,3]
## x1 2 8 18
## x2 6 0 4
^2
x_mat## [,1] [,2] [,3]
## x1 1 16 81
## x2 9 0 4
+ x_mat
x_mat ## [,1] [,2] [,3]
## x1 2 8 18
## x2 6 0 4
*x_mat
x_mat## [,1] [,2] [,3]
## x1 1 16 81
## x2 9 0 4
^x_mat
x_mat## [,1] [,2] [,3]
## x1 1 256 387420489
## x2 27 1 4
And you can also use matrix algebra
<- matrix(2:7,2,3)
x_mat1
x_mat1## [,1] [,2] [,3]
## [1,] 2 4 6
## [2,] 3 5 7
<- matrix(4:-1,2,3)
x_mat2
x_mat2## [,1] [,2] [,3]
## [1,] 4 2 0
## [2,] 3 1 -1
tcrossprod(x_mat1, x_mat2) #x_mat1 %*% t(x_mat2)
## [,1] [,2]
## [1,] 16 4
## [2,] 22 7
crossprod(x_mat1, x_mat2)
## [,1] [,2] [,3]
## [1,] 17 7 -3
## [2,] 31 13 -5
## [3,] 45 19 -7
# x_mat1 * x_mat2
Functions are applied to objects
# Define a function that adds two to any vector
<- function(input_vector) {
add_2 <- input_vector + 2 # new object defined locally
output_vector return(output_vector) # return new object
}# Apply that function to a vector
<- c(0,1,3,10,6)
x add_2(x)
## [1] 2 3 5 12 8
# notice 'output_vector' is not available here
There are many many generalizations
<- function(input_vector1, input_vector2) {
add_vec <- input_vector1 + input_vector2
output_vector return(output_vector)
}add_vec(x,3)
## [1] 3 4 6 13 9
add_vec(x,x)
## [1] 0 2 6 20 12
<- function(x1, x2) {
sum_squared <- (x1 + x2)^2
y return(y)
}
sum_squared(1, 3)
## [1] 16
sum_squared(x, 2)
## [1] 4 9 25 144 64
sum_squared(x, NA)
## [1] NA NA NA NA NA
sum_squared(x, x)
## [1] 0 4 36 400 144
sum_squared(x, 2*x)
## [1] 0 9 81 900 324
Functions can take functions as arguments
<- function(f){
fun_of_seq <- seq(1,3, length.out=12)
x <- f(x)
y return(y)
}
fun_of_seq(mean)
## [1] 2
fun_of_seq(sd)
## [1] 0.6555548
You can apply functions to matrices
sum_squared(x_mat, x_mat)
## [,1] [,2] [,3]
## x1 4 64 324
## x2 36 0 16
# Apply function to each matrix row
<- apply(x_mat, 1, sum)^2
y # ?apply #checks the function details
- sum_squared(x, x) # tests if there are any differences
y ## [1] 196 21 160 -375 52
There are many possible functions you can apply
# Return Y-value with minimum absolute difference from 3
<- abs( y - 3 )
abs_diff_y # is this the luckiest number?
abs_diff_y ## x1 x2
## 193 22
#min(abs_diff_y)
#which.min(abs_diff_y)
which.min(abs_diff_y) ]
y[ ## x2
## 25
There are also some useful built in functions
<- matrix(c(1:3,2*(1:3)),byrow=TRUE,ncol=3)
m
m## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 2 4 6
# normalize rows
/rowSums(m)
m## [,1] [,2] [,3]
## [1,] 0.1666667 0.3333333 0.5
## [2,] 0.1666667 0.3333333 0.5
# normalize columns
t(t(m)/colSums(m))
## [,1] [,2] [,3]
## [1,] 0.3333333 0.3333333 0.3333333
## [2,] 0.6666667 0.6666667 0.6666667
# de-mean rows
sweep(m,1,rowMeans(m), '-')
## [,1] [,2] [,3]
## [1,] -1 0 1
## [2,] -2 0 2
# de-mean columns
sweep(m,2,colMeans(m), '-')
## [,1] [,2] [,3]
## [1,] -0.5 -1 -1.5
## [2,] 0.5 1 1.5
Applying the same function over and over again
<- vector(length=3)
exp_vector for(i in 1:3){
<- exp(i)
exp_vector[i]
}
# Compare
exp_vector## [1] 2.718282 7.389056 20.085537
c( exp(1), exp(2), exp(3))
## [1] 2.718282 7.389056 20.085537
store complicated example
<- function(i, j=0){
complicate_fun <- i^(i-1)
x <- x + mean( j:i )
y <- log(y)/i
z return(z)
}<- vector(length=10)
complicated_vector for(i in 1:10){
<- complicate_fun(i)
complicated_vector[i] }
recursive loop
<- vector(length=4)
x 1] <- 1
x[for(i in 2:4){
<- (x[i-1]+1)^2
x[i]
}
x## [1] 1 4 25 676
Basic Logic
<- c(1,2,3,NA)
x > 2
x ## [1] FALSE FALSE TRUE NA
==2
x## [1] FALSE TRUE FALSE NA
any(x==2)
## [1] TRUE
all(x==2)
## [1] FALSE
2 %in% x
## [1] TRUE
is.numeric(x)
## [1] TRUE
is.na(x)
## [1] FALSE FALSE FALSE TRUE
The “&” and “|” commands are logical calculations that compare vectors to the left and right.
<- 1:3
x is.numeric(x) & (x < 2)
## [1] TRUE FALSE FALSE
is.numeric(x) | (x < 2)
## [1] TRUE TRUE TRUE
if(length(x) >= 5 & x[5] > 12) print("ok")
Advanced Logic.
<- 1:10
x cut(x, 4)
## [1] (0.991,3.25] (0.991,3.25] (0.991,3.25] (3.25,5.5] (3.25,5.5]
## [6] (5.5,7.75] (5.5,7.75] (7.75,10] (7.75,10] (7.75,10]
## Levels: (0.991,3.25] (3.25,5.5] (5.5,7.75] (7.75,10]
split(x, cut(x, 4))
## $`(0.991,3.25]`
## [1] 1 2 3
##
## $`(3.25,5.5]`
## [1] 4 5
##
## $`(5.5,7.75]`
## [1] 6 7
##
## $`(7.75,10]`
## [1] 8 9 10
<- split(x, cut(x, 4))
xs sapply(xs, mean)
## (0.991,3.25] (3.25,5.5] (5.5,7.75] (7.75,10]
## 2.0 4.5 6.5 9.0
# shortcut
aggregate(x, list(cut(x,4)), mean)
## Group.1 x
## 1 (0.991,3.25] 2.0
## 2 (3.25,5.5] 4.5
## 3 (5.5,7.75] 6.5
## 4 (7.75,10] 9.0
see https://bookdown.org/rwnahhas/IntroToR/logical.html
Arrays are generalization of matrices. They are often used in spatial econometrics, and are a very efficient way to store numeric data with the same dimensions.
<- array(data = 1:24, dim = c(2, 3, 4)) a
a
1, , , drop = FALSE] # Row 1
a[#a[, 1, , drop = FALSE] # Column 1
#a[, , 1, drop = FALSE] # Layer 1
1, 1, ] # Row 1, column 1
a[ #a[ 1, , 1] # Row 1, "layer" 1
#a[ , 1, 1] # Column 1, "layer" 1
1 , 1, 1] # Row 1, column 1, "layer" 1 a[
Apply extends to arrays
apply(a, 1, mean) # Row means
## [1] 12 13
apply(a, 2, mean) # Column means
## [1] 10.5 12.5 14.5
apply(a, 3, mean) # "Layer" means
## [1] 3.5 9.5 15.5 21.5
apply(a, 1:2, mean) # Row/Column combination
## [,1] [,2] [,3]
## [1,] 10 12 14
## [2,] 11 13 15
Outer products yield arrays
<- c(1,2,3)
x <- outer(x, x) # x %o% x
x_mat1
x_mat1## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 2 4 6
## [3,] 3 6 9
is.array(x_mat) # Matrices are arrays
## [1] TRUE
<- matrix(6:1,2,3)
x_mat2 outer(x_mat2, x)
## , , 1
##
## [,1] [,2] [,3]
## [1,] 6 4 2
## [2,] 5 3 1
##
## , , 2
##
## [,1] [,2] [,3]
## [1,] 12 8 4
## [2,] 10 6 2
##
## , , 3
##
## [,1] [,2] [,3]
## [1,] 18 12 6
## [2,] 15 9 3
# outer(x_mat2, matrix(x))
# outer(x_mat2, t(x))
# outer(x_mat1, x_mat2)