Functions and iterative operations

MACS 33001 University of Chicago

Functions

Function components

  • Name
  • Arguments
  • Body

Rescale function

rescale01 <- function(x) {
  rng <- range(x, na.rm = TRUE)
  (x - rng[1]) / (rng[2] - rng[1])
}

rescale01(c(0, 5, 10))
## [1] 0.0 0.5 1.0
rescale01(c(-10, 0, 10))
## [1] 0.0 0.5 1.0
rescale01(c(1, 2, 3, NA, 5))
## [1] 0.00 0.25 0.50   NA 1.00
  • Name
  • Arguments
  • Body

What is that?

pythagorean <- function(a, b){
  hypotenuse <- sqrt(a^2 + b^2)
  return(hypotenuse)
}
  • Name
  • Arguments
  • Body

How to use a function

# print the output of the function
pythagorean(a = 3, b = 4)
## [1] 5
# save the output as a new object
(tri_c <- pythagorean(a = 3, b = 4))
## [1] 5
# what happens to the hypotenuse from inside the function?
pythagorean(a = 3, b = 4)
## [1] 5
hypotenuse
## Error in eval(expr, envir, enclos): object 'hypotenuse' not found

Iteration

df <- tibble(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)
median(df$a)
## [1] -0.556
median(df$b)
## [1] -0.494
median(df$c)
## [1] -0.466
median(df$d)
## [1] -0.605

Iteration with for loop

output <- vector(mode = "double", length = ncol(df))
for (i in seq_along(df)) {
  output[[i]] <- median(df[[i]])
}
output
## [1] -0.556 -0.494 -0.466 -0.605

Output

output <- vector(mode = "double", length = ncol(df))
vector(mode = "double", length = ncol(df))
## [1] 0 0 0 0
vector(mode = "logical", length = ncol(df))
## [1] FALSE FALSE FALSE FALSE
vector(mode = "character", length = ncol(df))
## [1] "" "" "" ""
vector(mode = "list", length = ncol(df))
## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL

Sequence

i in seq_along(df)
seq_along(df)
## [1] 1 2 3 4

Body

output[[i]] <- median(df[[i]])

Preallocation

x <- rnorm(1000, mean = 0, sd = 1)
str(x)
##  num [1:1000] 1.449 -1.069 -0.855 -0.281 -0.994 ...
# load microbenchmark library to time code
library(microbenchmark)

microbenchmark(
  # don't preallocate
  `No preallocation` = {
    output <- vector("numeric", 0)
    
    for (i in seq_along(x)) {
      output <- c(output, x[[i]] + 1)
    }
  },
  # preallocate
  `Preallocation` = {
    output <- vector("numeric", length(x))
    
    for (i in seq_along(x)) {
      output[[i]] <- x[[i]] + 1
    }
  }) %>%
  autoplot +
  scale_y_log10(breaks = c(2, 4, 8, 16, 32)) +
  labs(y = "Time [milliseconds]")

Map functions

  • Why for loops are good
  • Why map() functions may be better
  • Types of map() functions
    • map() makes a list
    • map_lgl() makes a logical vector
    • map_int() makes an integer vector
    • map_dbl() makes a double vector
    • map_chr() makes a character vector

Map functions

map_dbl(df, mean)
##      a      b      c      d 
## -0.383 -0.118 -0.388 -0.766
map_dbl(df, median)
##      a      b      c      d 
## -0.556 -0.494 -0.466 -0.605
map_dbl(df, sd)
##     a     b     c     d 
## 0.996 1.067 0.666 0.894

Map functions

map_dbl(df, mean, na.rm = TRUE)
##      a      b      c      d 
## -0.383 -0.118 -0.388 -0.766

Map functions

df %>%
  map_dbl(mean, na.rm = TRUE)
##      a      b      c      d 
## -0.383 -0.118 -0.388 -0.766