parse_logical(c(TRUE, TRUE, FALSE, TRUE, NA))
## [1] TRUE TRUE FALSE TRUE NA
parse_integer(c(1, 5, 3, 4, 12423))
## [1] 1 5 3 4 12423
parse_double(c(4.2, 4, 6, 53.2))
## [1] 4.2 4.0 6.0 53.2
parse_character(c("Goodnight Moon", "Runaway Bunny", "Big Red Barn"))
## [1] "Goodnight Moon" "Runaway Bunny" "Big Red Barn"
(x <- sample(10))
## [1] 2 6 5 8 9 4 1 7 10 3
x + c(100, 100, 100, 100, 100, 100, 100, 100, 100, 100)
## [1] 102 106 105 108 109 104 101 107 110 103
x + 100
## [1] 102 106 105 108 109 104 101 107 110 103
# create a sequence of numbers between 1 and 10
(x1 <- seq(from = 1, to = 2))
## [1] 1 2
(x2 <- seq(from = 1, to = 10))
## [1] 1 2 3 4 5 6 7 8 9 10
# add together two sequences of numbers
x1 + x2
## [1] 2 4 4 6 6 8 8 10 10 12
x <- c("one", "two", "three", "four", "five")
With positive integers
x[c(3, 2, 5)]
## [1] "three" "two" "five"
With negative integers
x[c(-1, -3, -5)]
## [1] "two" "four"
Don’t mix positive and negative
x[c(-1, 1)]
## Error in x[c(-1, 1)]: only 0's may be mixed with negative subscripts
(x <- c(10, 3, NA, 5, 8, 1, NA))
## [1] 10 3 NA 5 8 1 NA
# All non-missing values of x
!is.na(x)
## [1] TRUE TRUE FALSE TRUE TRUE TRUE FALSE
x[!is.na(x)]
## [1] 10 3 5 8 1
# All even (or missing!) values of x
x[x %% 2 == 0]
## [1] 10 NA 8 NA
x <- list(1, 2, 3)
x
## [[1]]
## [1] 1
##
## [[2]]
## [1] 2
##
## [[3]]
## [1] 3
str()
str(x)
## List of 3
## $ : num 1
## $ : num 2
## $ : num 3
x_named <- list(a = 1, b = 2, c = 3)
str(x_named)
## List of 3
## $ a: num 1
## $ b: num 2
## $ c: num 3
y <- list("a", 1L, 1.5, TRUE)
str(y)
## List of 4
## $ : chr "a"
## $ : int 1
## $ : num 1.5
## $ : logi TRUE
z <- list(list(1, 2), list(3, 4))
str(z)
## List of 2
## $ :List of 2
## ..$ : num 1
## ..$ : num 2
## $ :List of 2
## ..$ : num 3
## ..$ : num 4
str(diamonds)
## Classes 'tbl_df', 'tbl' and 'data.frame': 53940 obs. of 10 variables:
## $ carat : num 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
## $ cut : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
## $ color : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
## $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
## $ depth : num 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
## $ table : num 55 61 65 58 58 57 57 55 61 61 ...
## $ price : int 326 326 327 334 335 336 336 337 337 338 ...
## $ x : num 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
## $ y : num 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
## $ z : num 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
A <- matrix(c(6, 9, 12, 13, 21, 5), nrow = 3, ncol = 2)
A
## [,1] [,2]
## [1,] 6 13
## [2,] 9 21
## [3,] 12 5
class(A)
## [1] "matrix"
x <- array(rep(0, 2*3*2), dim = c(2, 3, 2))
x
## , , 1
##
## [,1] [,2] [,3]
## [1,] 0 0 0
## [2,] 0 0 0
##
## , , 2
##
## [,1] [,2] [,3]
## [1,] 0 0 0
## [2,] 0 0 0
str(x)
## num [1:2, 1:3, 1:2] 0 0 0 0 0 0 0 0 0 0 ...
dim(x)
## [1] 2 3 2
dplyr
dplyr
function() |
Action performed |
---|---|
filter() |
Subsets observations based on their values |
arrange() |
Changes the order of observations based on their values |
select() |
Selects a subset of columns from the data frame |
rename() |
Changes the name of columns in the data frame |
mutate() |
Creates new columns (or variables) |
group_by() |
Changes the unit of analysis from the complete dataset to individual groups |
summarize() |
Collapses the data frame to a smaller number of rows which summarize the larger data |
table4a
## # A tibble: 3 x 3
## country `1999` `2000`
## * <chr> <int> <int>
## 1 Afghanistan 745 2666
## 2 Brazil 37737 80488
## 3 China 212258 213766
table4a
## # A tibble: 3 x 3
## country `1999` `2000`
## * <chr> <int> <int>
## 1 Afghanistan 745 2666
## 2 Brazil 37737 80488
## 3 China 212258 213766
table4a %>%
gather(`1999`, `2000`, key = "year", value = "cases")
## # A tibble: 6 x 3
## country year cases
## <chr> <chr> <int>
## 1 Afghanistan 1999 745
## 2 Brazil 1999 37737
## 3 China 1999 212258
## 4 Afghanistan 2000 2666
## 5 Brazil 2000 80488
## 6 China 2000 213766
table2
## # A tibble: 12 x 4
## country year type count
## <chr> <int> <chr> <int>
## 1 Afghanistan 1999 cases 745
## 2 Afghanistan 1999 population 19987071
## 3 Afghanistan 2000 cases 2666
## 4 Afghanistan 2000 population 20595360
## 5 Brazil 1999 cases 37737
## 6 Brazil 1999 population 172006362
## 7 Brazil 2000 cases 80488
## 8 Brazil 2000 population 174504898
## 9 China 1999 cases 212258
## 10 China 1999 population 1272915272
## 11 China 2000 cases 213766
## 12 China 2000 population 1280428583
table2
## # A tibble: 12 x 4
## country year type count
## <chr> <int> <chr> <int>
## 1 Afghanistan 1999 cases 745
## 2 Afghanistan 1999 population 19987071
## 3 Afghanistan 2000 cases 2666
## 4 Afghanistan 2000 population 20595360
## 5 Brazil 1999 cases 37737
## 6 Brazil 1999 population 172006362
## 7 Brazil 2000 cases 80488
## 8 Brazil 2000 population 174504898
## 9 China 1999 cases 212258
## 10 China 1999 population 1272915272
## 11 China 2000 cases 213766
## 12 China 2000 population 1280428583
table2 %>%
spread(key = type, value = count)
## # A tibble: 6 x 4
## country year cases population
## <chr> <int> <int> <int>
## 1 Afghanistan 1999 745 19987071
## 2 Afghanistan 2000 2666 20595360
## 3 Brazil 1999 37737 172006362
## 4 Brazil 2000 80488 174504898
## 5 China 1999 212258 1272915272
## 6 China 2000 213766 1280428583
table3
## # A tibble: 6 x 3
## country year rate
## * <chr> <int> <chr>
## 1 Afghanistan 1999 745/19987071
## 2 Afghanistan 2000 2666/20595360
## 3 Brazil 1999 37737/172006362
## 4 Brazil 2000 80488/174504898
## 5 China 1999 212258/1272915272
## 6 China 2000 213766/1280428583
table3
## # A tibble: 6 x 3
## country year rate
## * <chr> <int> <chr>
## 1 Afghanistan 1999 745/19987071
## 2 Afghanistan 2000 2666/20595360
## 3 Brazil 1999 37737/172006362
## 4 Brazil 2000 80488/174504898
## 5 China 1999 212258/1272915272
## 6 China 2000 213766/1280428583
table3 %>%
separate(rate, into = c("cases", "population"))
## # A tibble: 6 x 4
## country year cases population
## * <chr> <int> <chr> <chr>
## 1 Afghanistan 1999 745 19987071
## 2 Afghanistan 2000 2666 20595360
## 3 Brazil 1999 37737 172006362
## 4 Brazil 2000 80488 174504898
## 5 China 1999 212258 1272915272
## 6 China 2000 213766 1280428583
table5
## # A tibble: 6 x 4
## country century year rate
## * <chr> <chr> <chr> <chr>
## 1 Afghanistan 19 99 745/19987071
## 2 Afghanistan 20 00 2666/20595360
## 3 Brazil 19 99 37737/172006362
## 4 Brazil 20 00 80488/174504898
## 5 China 19 99 212258/1272915272
## 6 China 20 00 213766/1280428583
table5
## # A tibble: 6 x 4
## country century year rate
## * <chr> <chr> <chr> <chr>
## 1 Afghanistan 19 99 745/19987071
## 2 Afghanistan 20 00 2666/20595360
## 3 Brazil 19 99 37737/172006362
## 4 Brazil 20 00 80488/174504898
## 5 China 19 99 212258/1272915272
## 6 China 20 00 213766/1280428583
table5 %>%
unite(new, century, year)
## # A tibble: 6 x 3
## country new rate
## <chr> <chr> <chr>
## 1 Afghanistan 19_99 745/19987071
## 2 Afghanistan 20_00 2666/20595360
## 3 Brazil 19_99 37737/172006362
## 4 Brazil 20_00 80488/174504898
## 5 China 19_99 212258/1272915272
## 6 China 20_00 213766/1280428583
# remove underscore
table5 %>%
unite(new, century, year, sep = "")
## # A tibble: 6 x 3
## country new rate
## <chr> <chr> <chr>
## 1 Afghanistan 1999 745/19987071
## 2 Afghanistan 2000 2666/20595360
## 3 Brazil 1999 37737/172006362
## 4 Brazil 2000 80488/174504898
## 5 China 1999 212258/1272915272
## 6 China 2000 213766/1280428583