Introduction to Functional Programming in R

Author

Geovanni Flores

Published

April 24, 2026

Libraries

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.6
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.1     ✔ tibble    3.3.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(purrr)
library(janitor)

Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test

Question 1

quantity <- 50

if (quantity > 20) {
  print("You sold a lot")
} else {
  print("Not enough for today.")
}
[1] "You sold a lot"

Question 2

sales <- 25

if (sales < 20) {
  print("Not enough for today.")
} else if (sales >= 20 & sales <= 30) {
  print("Average Day.")
} else {
  print("You had a great day!")
}
[1] "Average Day."

Question 3

category <- "A"
price <- 100

tax_rate <- if (category == "A") {
  8
} else if (category == "B") {
  10
} else if (category == "C") {
  20
}

final_price <- price |> (\(x) x * (1 + tax_rate / 100))()

paste("A tax rate of", tax_rate, "% is applied. The total price is", final_price, ".") |> print()
[1] "A tax rate of 8 % is applied. The total price is 108 ."

Question 4

q4 <- c(2, 5, 3, 9, 8, 11, 6)

count <- 0

q4 |> (\(x) for (i in x) {
  if (i %% 2 == 0) {
    count <<- count + 1
  }
})()

count |> print()
[1] 3

Question 5

q5 <- 1:8

q5 |> (\(x) for (i in x) {
  if (i > 4 & i %% 2 == 0) {
    print(i)
  } else {
    print("Condition not satisfied")
  }
})()
[1] "Condition not satisfied"
[1] "Condition not satisfied"
[1] "Condition not satisfied"
[1] "Condition not satisfied"
[1] "Condition not satisfied"
[1] 6
[1] "Condition not satisfied"
[1] 8

Question 6

pow <- function(x, y) {
  result <- x |> (\(base) base^y)()
  
  paste(x, "raised to the power of", y, "is", result, ".") |> print()
}

pow(8, 2)
[1] "8 raised to the power of 2 is 64 ."

Question 7

Q7.1

df <- data.frame(x = 1:4, y = 5:8, z = 10:13)

df$row_mean <- df |> apply(1, mean)
df$row_sum  <- df |> apply(1, sum)

df |> print()
  x y  z row_mean  row_sum
1 1 5 10 5.333333 21.33333
2 2 6 11 6.333333 25.33333
3 3 7 12 7.333333 29.33333
4 4 8 13 8.333333 33.33333

Q7.2

q8 <- c(12, 18, 6)

q8 |> sapply(sqrt) |> print()
[1] 3.464102 4.242641 2.449490
q8 |> map_dbl(sqrt) |> print()
[1] 3.464102 4.242641 2.449490

Q7.3

q9 <- list(A = 1:5, B = 6:20, C = 1)

q9 |> sapply(length) |> print()
 A  B  C 
 5 15  1 
q9 |> map_int(length) |> print()
 A  B  C 
 5 15  1 

Q7.4

Note

Answer: lapply always returns a list, while sapply simplifies the result into a vector or matrix when possible. map() from purrr is similar to lapply (always returns a list). map_dbl(), map_int(), etc. are similar to sapply as they return simplified vectors of a specific type. To calculate exponentials of three numbers, sapply or map_dbl should be used because they simplify the output into a clean numeric vector, making the results easier to read and work with.

Question 8

Q8.1

theme_set(theme_bw())

student_ratio <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-07/student_teacher_ratio.csv")
Rows: 5189 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (6): edulit_ind, indicator, country_code, country, flag_codes, flags
dbl (2): year, student_ratio

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Q8.2

student_ratio |> count(year) |> print()
# A tibble: 7 × 2
   year     n
  <dbl> <int>
1  2012   887
2  2013   880
3  2014   913
4  2015   921
5  2016   917
6  2017   635
7  2018    36
student_ratio |> count(indicator) |> print()
# A tibble: 7 × 2
  indicator                                 n
  <chr>                                 <int>
1 Lower Secondary Education               739
2 Post-Secondary Non-Tertiary Education   494
3 Pre-Primary Education                   837
4 Primary Education                      1029
5 Secondary Education                     840
6 Tertiary Education                      550
7 Upper Secondary Education               700
student_ratio |> count(country) |> print()
# A tibble: 232 × 2
   country                  n
   <chr>                <int>
 1 Afghanistan             13
 2 Africa (Northern)       30
 3 Africa (Sub-Saharan)    28
 4 Albania                 36
 5 Algeria                 18
 6 Andorra                 29
 7 Angola                   7
 8 Antigua and Barbuda     13
 9 Arab States             30
10 Argentina                5
# ℹ 222 more rows

Q8.3

top_bottom_2012 <- student_ratio |>
  filter(year == 2012, indicator == "Primary Education") |>
  filter(!is.na(student_ratio)) |>
  group_by(country) |>
  summarise(ratio = mean(student_ratio, na.rm = TRUE)) |>
  arrange(ratio) |>
  (\(x) bind_rows(head(x, 10), tail(x, 10)))() |>
  mutate(country = reorder(country, ratio))

top_bottom_2012 |>
  ggplot(aes(x = ratio, y = country, fill = ratio)) +
  geom_col() +
  labs(
    title = "Top and Bottom 10 Countries by Student-Teacher Ratio",
    subtitle = "Year: 2012 | Primary Education",
    x = "Student-Teacher Ratio",
    y = "Country"
  ) +
  theme(legend.position = "none")

Q8.4

plot_ratio_by_year <- function(yr) {
  student_ratio |>
    filter(year == yr, indicator == "Primary Education") |>
    filter(!is.na(student_ratio)) |>
    group_by(country) |>
    summarise(ratio = mean(student_ratio, na.rm = TRUE)) |>
    arrange(ratio) |>
    (\(x) bind_rows(head(x, 10), tail(x, 10)))() |>
    mutate(country = reorder(country, ratio)) |>
    ggplot(aes(x = ratio, y = country, fill = ratio)) +
    geom_col() +
    labs(
      title = "Top and Bottom 10 Countries by Student-Teacher Ratio",
      subtitle = paste("Year:", yr, "| Primary Education"),
      x = "Student-Teacher Ratio",
      y = "Country"
    ) +
    theme(legend.position = "none") |>
    print()
}

Q8.5

2012:2017 |> sapply(plot_ratio_by_year)
<theme> List of 1
 $ legend.position: chr "none"
 @ complete: logi FALSE
 @ validate: logi TRUE
<theme> List of 1
 $ legend.position: chr "none"
 @ complete: logi FALSE
 @ validate: logi TRUE
<theme> List of 1
 $ legend.position: chr "none"
 @ complete: logi FALSE
 @ validate: logi TRUE
<theme> List of 1
 $ legend.position: chr "none"
 @ complete: logi FALSE
 @ validate: logi TRUE
<theme> List of 1
 $ legend.position: chr "none"
 @ complete: logi FALSE
 @ validate: logi TRUE
<theme> List of 1
 $ legend.position: chr "none"
 @ complete: logi FALSE
 @ validate: logi TRUE
[[1]]


[[2]]


[[3]]


[[4]]


[[5]]


[[6]]

Question 9

Q9.0

animal_outcomes <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-21/animal_outcomes.csv')
Rows: 664 Columns: 12
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (2): animal_type, outcome
dbl (10): year, ACT, NSW, NT, QLD, SA, TAS, VIC, WA, Total

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
animal_complaints <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-21/animal_complaints.csv')
Rows: 42413 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): Animal Type, Complaint Type, Date Received, Suburb, Electoral Division

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
brisbane_complaints <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-21/brisbane_complaints.csv')
Rows: 31330 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (7): nature, animal_type, category, suburb, date_range, responsible_offi...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Q9.1

animal_outcomes |> glimpse()
Rows: 664
Columns: 12
$ year        <dbl> 1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999…
$ animal_type <chr> "Dogs", "Dogs", "Dogs", "Dogs", "Cats", "Cats", "Cats", "C…
$ outcome     <chr> "Reclaimed", "Rehomed", "Other", "Euthanized", "Reclaimed"…
$ ACT         <dbl> 610, 1245, 12, 360, 111, 1442, 0, 1007, 0, 1, 0, 0, 2, 90,…
$ NSW         <dbl> 3140, 7525, 745, 9221, 201, 3913, 447, 8205, 0, 12, 0, 8, …
$ NT          <dbl> 205, 526, 955, 9, 22, 269, 0, 847, 1, 3, 0, 0, 0, 120, 0, …
$ QLD         <dbl> 1392, 5489, 860, 9214, 206, 3901, 386, 10554, 0, 3, 11, 1,…
$ SA          <dbl> 2329, 1105, 380, 1701, 157, 1055, 46, 3415, 2, 10, 1, 0, 1…
$ TAS         <dbl> 516, 480, 168, 599, 31, 752, 124, 1056, 1, 0, 2, 0, 1, 25,…
$ VIC         <dbl> 7130, 4908, 1001, 5217, 884, 3768, 1501, 6113, 87, 19, 0, …
$ WA          <dbl> 1, 137, 6, 18, 0, 62, 5, 5, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, …
$ Total       <dbl> 15323, 21415, 4127, 26339, 1612, 15162, 2509, 31202, 91, 4…
animal_complaints |> glimpse()
Rows: 42,413
Columns: 5
$ `Animal Type`        <chr> "dog", "dog", "dog", "dog", "dog", "dog", "dog", …
$ `Complaint Type`     <chr> "Aggressive Animal", "Noise", "Noise", "Private I…
$ `Date Received`      <chr> "June 2020", "June 2020", "June 2020", "June 2020…
$ Suburb               <chr> "Alice River", "Alice River", "Alice River", "Ali…
$ `Electoral Division` <chr> "Division 1", "Division 1", "Division 1", "Divisi…
brisbane_complaints |> glimpse()
Rows: 31,330
Columns: 7
$ nature             <chr> "Animal", "Animal", "Animal", "Animal", "Animal", "…
$ animal_type        <chr> "Dog", "Dog", "Dog", "Dog", "Attack", "Attack", "Do…
$ category           <chr> "Fencing Issues", "Fencing Issues", "Defecating In …
$ suburb             <chr> "SUNNYBANK", "SUNNYBANK HILLS", "SUNNYBANK", "SUNNY…
$ date_range         <chr> "1st-quarter-2016-17.csv", "1st-quarter-2016-17.csv…
$ responsible_office <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ city               <chr> "Brisbane", "Brisbane", "Brisbane", "Brisbane", "Br…

Q9.2

clean_names_fn <- function(df) {
  df |> clean_names()
}

animal_outcomes   <- animal_outcomes   |> clean_names_fn()
animal_complaints <- animal_complaints |> clean_names_fn()
brisbane_complaints <- brisbane_complaints |> clean_names_fn()
animal_outcomes |> names()
 [1] "year"        "animal_type" "outcome"     "act"         "nsw"        
 [6] "nt"          "qld"         "sa"          "tas"         "vic"        
[11] "wa"          "total"      
animal_complaints |> names()
[1] "animal_type"        "complaint_type"     "date_received"     
[4] "suburb"             "electoral_division"
brisbane_complaints |> names()
[1] "nature"             "animal_type"        "category"          
[4] "suburb"             "date_range"         "responsible_office"
[7] "city"              

Q9.3

plot_bar <- function(df, var) {
  df |>
    count({{ var }}) |>
    ggplot(aes(x = reorder({{ var }}, n), y = n, fill = {{ var }})) +
    geom_col() +
    coord_flip() +
    labs(
      x = deparse(substitute(var)),
      y = "Count"
    ) +
    theme(legend.position = "none")
}

Q9.4

animal_outcomes |> plot_bar(animal_type) +
  labs(title = "Count by Animal Type", subtitle = "animal_outcomes dataset")

animal_outcomes |> plot_bar(outcome) +
  labs(title = "Count by Outcome", subtitle = "animal_outcomes dataset")

animal_complaints |> plot_bar(animal_type) +
  labs(title = "Count by Animal Type", subtitle = "animal_complaints dataset")

animal_complaints |> plot_bar(complaint_type) +
  labs(title = "Count by Complaint Type", subtitle = "animal_complaints dataset")

brisbane_complaints |> plot_bar(animal_type) +
  labs(title = "Count by Animal Type", subtitle = "brisbane_complaints dataset")

brisbane_complaints |> plot_bar(category) +
  labs(title = "Count by Category", subtitle = "brisbane_complaints dataset")

Question 10

avg_ozone_by_month <- function(data) {
  
  calc_avg_ozone <- function(df) {
    df |>
      filter(!is.na(Ozone)) |>
      summarise(avg_ozone = mean(Ozone))
  }
  
  data |>
    group_by(Month) |>
    nest() |>
    mutate(avg_ozone = map_dbl(data, ~ calc_avg_ozone(.x) |> pull(avg_ozone))) |>
    select(Month, avg_ozone) |>
    print()
}

airquality |> avg_ozone_by_month()
# A tibble: 5 × 2
# Groups:   Month [5]
  Month avg_ozone
  <int>     <dbl>
1     5      23.6
2     6      29.4
3     7      59.1
4     8      60.0
5     9      31.4