--- title: "Data Wrangling" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Data Wrangling} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r knitr-mechanics} #| include: false knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ``` ```{r pre-setup} #| echo: false #| message: false # devtools::install_github("njlyon0/supportR", force = TRUE) ``` ## Overview The `supportR` package is an amalgam of distinct functions I've written to accomplish small data wrangling, quality control, or visualization tasks. These functions tend to be short and narrowly-defined. An additional consequence of the motivation for creating them is that they tend to not be inter-related or united by a common theme. To help account for that, I've divided this vignette into groups of functions that do have similar purposes. If any of these functions don't work for you, please [open an issue](https://github.com/njlyon0/supportR/issues) so that I can repair the issue as soon as possible. ```{r setup} # install.packages("supportR") library(supportR) ``` ### Example Data In order to demonstrate some of the data wrangling functions of `supportR`, we'll use some some example data from Dr. [Allison Horst](https://allisonhorst.com/allison-horst)'s [`palmerpenguins` R package](https://github.com/allisonhorst/palmerpenguins). ```{r penguin-data-get} #| include: false penguins <- structure(list(species = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("Adelie", "Chinstrap", "Gentoo"), class = "factor"), island = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("Biscoe", "Dream", "Torgersen" ), class = "factor"), bill_length_mm = c(39.1, 39.5, 40.3, NA, 36.7, 39.3, 38.9, 39.2, 34.1, 42, 37.8, 37.8, 41.1, 38.6, 34.6, 36.6, 38.7, 42.5, 34.4, 46, 37.8, 37.7, 35.9, 38.2, 38.8, 35.3, 40.6, 40.5, 37.9, 40.5, 39.5, 37.2, 39.5, 40.9, 36.4, 39.2, 38.8, 42.2, 37.6, 39.8, 36.5, 40.8, 36, 44.1, 37, 39.6, 41.1, 37.5, 36, 42.3, 39.6, 40.1, 35, 42, 34.5, 41.4, 39, 40.6, 36.5, 37.6, 35.7, 41.3, 37.6, 41.1, 36.4, 41.6, 35.5, 41.1, 35.9, 41.8, 33.5, 39.7, 39.6, 45.8, 35.5, 42.8, 40.9, 37.2, 36.2, 42.1, 34.6, 42.9, 36.7, 35.1, 37.3, 41.3, 36.3, 36.9, 38.3, 38.9, 35.7, 41.1, 34, 39.6, 36.2, 40.8, 38.1, 40.3, 33.1, 43.2, 35, 41, 37.7, 37.8, 37.9, 39.7, 38.6, 38.2, 38.1, 43.2, 38.1, 45.6, 39.7, 42.2, 39.6, 42.7, 38.6, 37.3, 35.7, 41.1, 36.2, 37.7, 40.2, 41.4, 35.2, 40.6, 38.8, 41.5, 39, 44.1, 38.5, 43.1, 36.8, 37.5, 38.1, 41.1, 35.6, 40.2, 37, 39.7, 40.2, 40.6, 32.1, 40.7, 37.3, 39, 39.2, 36.6, 36, 37.8, 36, 41.5, 46.1, 50, 48.7, 50, 47.6, 46.5, 45.4, 46.7, 43.3, 46.8, 40.9, 49, 45.5, 48.4, 45.8, 49.3, 42, 49.2, 46.2, 48.7, 50.2, 45.1, 46.5, 46.3, 42.9, 46.1, 44.5, 47.8, 48.2, 50, 47.3, 42.8, 45.1, 59.6, 49.1, 48.4, 42.6, 44.4, 44, 48.7, 42.7, 49.6, 45.3, 49.6, 50.5, 43.6, 45.5, 50.5, 44.9, 45.2, 46.6, 48.5, 45.1, 50.1, 46.5, 45, 43.8, 45.5, 43.2, 50.4, 45.3, 46.2, 45.7, 54.3, 45.8, 49.8, 46.2, 49.5, 43.5, 50.7, 47.7, 46.4, 48.2, 46.5, 46.4, 48.6, 47.5, 51.1, 45.2, 45.2, 49.1, 52.5, 47.4, 50, 44.9, 50.8, 43.4, 51.3, 47.5, 52.1, 47.5, 52.2, 45.5, 49.5, 44.5, 50.8, 49.4, 46.9, 48.4, 51.1, 48.5, 55.9, 47.2, 49.1, 47.3, 46.8, 41.7, 53.4, 43.3, 48.1, 50.5, 49.8, 43.5, 51.5, 46.2, 55.1, 44.5, 48.8, 47.2, NA, 46.8, 50.4, 45.2, 49.9, 46.5, 50, 51.3, 45.4, 52.7, 45.2, 46.1, 51.3, 46, 51.3, 46.6, 51.7, 47, 52, 45.9, 50.5, 50.3, 58, 46.4, 49.2, 42.4, 48.5, 43.2, 50.6, 46.7, 52, 50.5, 49.5, 46.4, 52.8, 40.9, 54.2, 42.5, 51, 49.7, 47.5, 47.6, 52, 46.9, 53.5, 49, 46.2, 50.9, 45.5, 50.9, 50.8, 50.1, 49, 51.5, 49.8, 48.1, 51.4, 45.7, 50.7, 42.5, 52.2, 45.2, 49.3, 50.2, 45.6, 51.9, 46.8, 45.7, 55.8, 43.5, 49.6, 50.8, 50.2), bill_depth_mm = c(18.7, 17.4, 18, NA, 19.3, 20.6, 17.8, 19.6, 18.1, 20.2, 17.1, 17.3, 17.6, 21.2, 21.1, 17.8, 19, 20.7, 18.4, 21.5, 18.3, 18.7, 19.2, 18.1, 17.2, 18.9, 18.6, 17.9, 18.6, 18.9, 16.7, 18.1, 17.8, 18.9, 17, 21.1, 20, 18.5, 19.3, 19.1, 18, 18.4, 18.5, 19.7, 16.9, 18.8, 19, 18.9, 17.9, 21.2, 17.7, 18.9, 17.9, 19.5, 18.1, 18.6, 17.5, 18.8, 16.6, 19.1, 16.9, 21.1, 17, 18.2, 17.1, 18, 16.2, 19.1, 16.6, 19.4, 19, 18.4, 17.2, 18.9, 17.5, 18.5, 16.8, 19.4, 16.1, 19.1, 17.2, 17.6, 18.8, 19.4, 17.8, 20.3, 19.5, 18.6, 19.2, 18.8, 18, 18.1, 17.1, 18.1, 17.3, 18.9, 18.6, 18.5, 16.1, 18.5, 17.9, 20, 16, 20, 18.6, 18.9, 17.2, 20, 17, 19, 16.5, 20.3, 17.7, 19.5, 20.7, 18.3, 17, 20.5, 17, 18.6, 17.2, 19.8, 17, 18.5, 15.9, 19, 17.6, 18.3, 17.1, 18, 17.9, 19.2, 18.5, 18.5, 17.6, 17.5, 17.5, 20.1, 16.5, 17.9, 17.1, 17.2, 15.5, 17, 16.8, 18.7, 18.6, 18.4, 17.8, 18.1, 17.1, 18.5, 13.2, 16.3, 14.1, 15.2, 14.5, 13.5, 14.6, 15.3, 13.4, 15.4, 13.7, 16.1, 13.7, 14.6, 14.6, 15.7, 13.5, 15.2, 14.5, 15.1, 14.3, 14.5, 14.5, 15.8, 13.1, 15.1, 14.3, 15, 14.3, 15.3, 15.3, 14.2, 14.5, 17, 14.8, 16.3, 13.7, 17.3, 13.6, 15.7, 13.7, 16, 13.7, 15, 15.9, 13.9, 13.9, 15.9, 13.3, 15.8, 14.2, 14.1, 14.4, 15, 14.4, 15.4, 13.9, 15, 14.5, 15.3, 13.8, 14.9, 13.9, 15.7, 14.2, 16.8, 14.4, 16.2, 14.2, 15, 15, 15.6, 15.6, 14.8, 15, 16, 14.2, 16.3, 13.8, 16.4, 14.5, 15.6, 14.6, 15.9, 13.8, 17.3, 14.4, 14.2, 14, 17, 15, 17.1, 14.5, 16.1, 14.7, 15.7, 15.8, 14.6, 14.4, 16.5, 15, 17, 15.5, 15, 13.8, 16.1, 14.7, 15.8, 14, 15.1, 15.2, 15.9, 15.2, 16.3, 14.1, 16, 15.7, 16.2, 13.7, NA, 14.3, 15.7, 14.8, 16.1, 17.9, 19.5, 19.2, 18.7, 19.8, 17.8, 18.2, 18.2, 18.9, 19.9, 17.8, 20.3, 17.3, 18.1, 17.1, 19.6, 20, 17.8, 18.6, 18.2, 17.3, 17.5, 16.6, 19.4, 17.9, 19, 18.4, 19, 17.8, 20, 16.6, 20.8, 16.7, 18.8, 18.6, 16.8, 18.3, 20.7, 16.6, 19.9, 19.5, 17.5, 19.1, 17, 17.9, 18.5, 17.9, 19.6, 18.7, 17.3, 16.4, 19, 17.3, 19.7, 17.3, 18.8, 16.6, 19.9, 18.8, 19.4, 19.5, 16.5, 17, 19.8, 18.1, 18.2, 19, 18.7), flipper_length_mm = c(181L, 186L, 195L, NA, 193L, 190L, 181L, 195L, 193L, 190L, 186L, 180L, 182L, 191L, 198L, 185L, 195L, 197L, 184L, 194L, 174L, 180L, 189L, 185L, 180L, 187L, 183L, 187L, 172L, 180L, 178L, 178L, 188L, 184L, 195L, 196L, 190L, 180L, 181L, 184L, 182L, 195L, 186L, 196L, 185L, 190L, 182L, 179L, 190L, 191L, 186L, 188L, 190L, 200L, 187L, 191L, 186L, 193L, 181L, 194L, 185L, 195L, 185L, 192L, 184L, 192L, 195L, 188L, 190L, 198L, 190L, 190L, 196L, 197L, 190L, 195L, 191L, 184L, 187L, 195L, 189L, 196L, 187L, 193L, 191L, 194L, 190L, 189L, 189L, 190L, 202L, 205L, 185L, 186L, 187L, 208L, 190L, 196L, 178L, 192L, 192L, 203L, 183L, 190L, 193L, 184L, 199L, 190L, 181L, 197L, 198L, 191L, 193L, 197L, 191L, 196L, 188L, 199L, 189L, 189L, 187L, 198L, 176L, 202L, 186L, 199L, 191L, 195L, 191L, 210L, 190L, 197L, 193L, 199L, 187L, 190L, 191L, 200L, 185L, 193L, 193L, 187L, 188L, 190L, 192L, 185L, 190L, 184L, 195L, 193L, 187L, 201L, 211L, 230L, 210L, 218L, 215L, 210L, 211L, 219L, 209L, 215L, 214L, 216L, 214L, 213L, 210L, 217L, 210L, 221L, 209L, 222L, 218L, 215L, 213L, 215L, 215L, 215L, 216L, 215L, 210L, 220L, 222L, 209L, 207L, 230L, 220L, 220L, 213L, 219L, 208L, 208L, 208L, 225L, 210L, 216L, 222L, 217L, 210L, 225L, 213L, 215L, 210L, 220L, 210L, 225L, 217L, 220L, 208L, 220L, 208L, 224L, 208L, 221L, 214L, 231L, 219L, 230L, 214L, 229L, 220L, 223L, 216L, 221L, 221L, 217L, 216L, 230L, 209L, 220L, 215L, 223L, 212L, 221L, 212L, 224L, 212L, 228L, 218L, 218L, 212L, 230L, 218L, 228L, 212L, 224L, 214L, 226L, 216L, 222L, 203L, 225L, 219L, 228L, 215L, 228L, 216L, 215L, 210L, 219L, 208L, 209L, 216L, 229L, 213L, 230L, 217L, 230L, 217L, 222L, 214L, NA, 215L, 222L, 212L, 213L, 192L, 196L, 193L, 188L, 197L, 198L, 178L, 197L, 195L, 198L, 193L, 194L, 185L, 201L, 190L, 201L, 197L, 181L, 190L, 195L, 181L, 191L, 187L, 193L, 195L, 197L, 200L, 200L, 191L, 205L, 187L, 201L, 187L, 203L, 195L, 199L, 195L, 210L, 192L, 205L, 210L, 187L, 196L, 196L, 196L, 201L, 190L, 212L, 187L, 198L, 199L, 201L, 193L, 203L, 187L, 197L, 191L, 203L, 202L, 194L, 206L, 189L, 195L, 207L, 202L, 193L, 210L, 198L), body_mass_g = c(3750L, 3800L, 3250L, NA, 3450L, 3650L, 3625L, 4675L, 3475L, 4250L, 3300L, 3700L, 3200L, 3800L, 4400L, 3700L, 3450L, 4500L, 3325L, 4200L, 3400L, 3600L, 3800L, 3950L, 3800L, 3800L, 3550L, 3200L, 3150L, 3950L, 3250L, 3900L, 3300L, 3900L, 3325L, 4150L, 3950L, 3550L, 3300L, 4650L, 3150L, 3900L, 3100L, 4400L, 3000L, 4600L, 3425L, 2975L, 3450L, 4150L, 3500L, 4300L, 3450L, 4050L, 2900L, 3700L, 3550L, 3800L, 2850L, 3750L, 3150L, 4400L, 3600L, 4050L, 2850L, 3950L, 3350L, 4100L, 3050L, 4450L, 3600L, 3900L, 3550L, 4150L, 3700L, 4250L, 3700L, 3900L, 3550L, 4000L, 3200L, 4700L, 3800L, 4200L, 3350L, 3550L, 3800L, 3500L, 3950L, 3600L, 3550L, 4300L, 3400L, 4450L, 3300L, 4300L, 3700L, 4350L, 2900L, 4100L, 3725L, 4725L, 3075L, 4250L, 2925L, 3550L, 3750L, 3900L, 3175L, 4775L, 3825L, 4600L, 3200L, 4275L, 3900L, 4075L, 2900L, 3775L, 3350L, 3325L, 3150L, 3500L, 3450L, 3875L, 3050L, 4000L, 3275L, 4300L, 3050L, 4000L, 3325L, 3500L, 3500L, 4475L, 3425L, 3900L, 3175L, 3975L, 3400L, 4250L, 3400L, 3475L, 3050L, 3725L, 3000L, 3650L, 4250L, 3475L, 3450L, 3750L, 3700L, 4000L, 4500L, 5700L, 4450L, 5700L, 5400L, 4550L, 4800L, 5200L, 4400L, 5150L, 4650L, 5550L, 4650L, 5850L, 4200L, 5850L, 4150L, 6300L, 4800L, 5350L, 5700L, 5000L, 4400L, 5050L, 5000L, 5100L, 4100L, 5650L, 4600L, 5550L, 5250L, 4700L, 5050L, 6050L, 5150L, 5400L, 4950L, 5250L, 4350L, 5350L, 3950L, 5700L, 4300L, 4750L, 5550L, 4900L, 4200L, 5400L, 5100L, 5300L, 4850L, 5300L, 4400L, 5000L, 4900L, 5050L, 4300L, 5000L, 4450L, 5550L, 4200L, 5300L, 4400L, 5650L, 4700L, 5700L, 4650L, 5800L, 4700L, 5550L, 4750L, 5000L, 5100L, 5200L, 4700L, 5800L, 4600L, 6000L, 4750L, 5950L, 4625L, 5450L, 4725L, 5350L, 4750L, 5600L, 4600L, 5300L, 4875L, 5550L, 4950L, 5400L, 4750L, 5650L, 4850L, 5200L, 4925L, 4875L, 4625L, 5250L, 4850L, 5600L, 4975L, 5500L, 4725L, 5500L, 4700L, 5500L, 4575L, 5500L, 5000L, 5950L, 4650L, 5500L, 4375L, 5850L, 4875L, 6000L, 4925L, NA, 4850L, 5750L, 5200L, 5400L, 3500L, 3900L, 3650L, 3525L, 3725L, 3950L, 3250L, 3750L, 4150L, 3700L, 3800L, 3775L, 3700L, 4050L, 3575L, 4050L, 3300L, 3700L, 3450L, 4400L, 3600L, 3400L, 2900L, 3800L, 3300L, 4150L, 3400L, 3800L, 3700L, 4550L, 3200L, 4300L, 3350L, 4100L, 3600L, 3900L, 3850L, 4800L, 2700L, 4500L, 3950L, 3650L, 3550L, 3500L, 3675L, 4450L, 3400L, 4300L, 3250L, 3675L, 3325L, 3950L, 3600L, 4050L, 3350L, 3450L, 3250L, 4050L, 3800L, 3525L, 3950L, 3650L, 3650L, 4000L, 3400L, 3775L, 4100L, 3775L), sex = structure(c(2L, 1L, 1L, NA, 1L, 2L, 1L, 2L, NA, NA, NA, NA, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, NA, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, NA, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, NA, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, NA, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, NA, 2L, 1L, NA, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L), levels = c("female", "male"), class = "factor"), year = c(2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L)), row.names = c(NA, -344L), class = "data.frame") ``` ```{r penguin-data-str} # Check the structure of the penguins dataset str(penguins) ``` ### Summarize Within Groups With that data loaded, we can use the `summary_table` function to quickly get group-wise summaries and retrieve generally useful summary statistics. The `groups` argument supports a vector of all of the column names to group by while `response` must be a single numeric column. The `drop_na` argument allows group combinations that result in an NA to be automatically dropped (i.e., if a penguin didn't have an island listed that would be dropped). The mean, standard deviation (SD), sample size, and standard error (SE) are all returned to facilitate easy figure creation. There is also a `round_digits` argument that lets you specify how many digits you'd like to retain for the mean, SD, and SE. ```{r summary_table} # Summarize the data supportR::summary_table(data = penguins, groups = c("species", "island"), response = "bill_length_mm", drop_na = TRUE) ``` ### Safely Rename Columns The `safe_rename` function allows--perhaps predictably--"safe" renaming of column names in a given data object. The 'bad' column names and corresponding 'good' names must be specified. The order of entries in the two vectors must match (i.e., the first bad name will be replaced with the first good name), but that order need not match the order in which they occur in the data! ```{r safe-rename} # Make a dataframe to demonstrate df <- data.frame("first" = 1:3, "middle" = 4:6, "second" = 7:9) # Invoke the function supportR::safe_rename(data = df, bad_names = c("second", "middle"), good_names = c("third", "second")) ``` ### Handle Special Characters When loading text data into R you may have encountered pernicious 'special' characters that can affect the meaning of character data. These characters are often non-ASCII (see [here](https://en.wikipedia.org/wiki/ASCII) for more context on "ASCII" characters) and `supportR` offers the `replace_non_ascii` function to find and replace these characters with visually similar equivalents in a given vector. Note that letters with accents above or below them (e.g., umlaut, accent circumflex, etc.) are technically non-ASCII characters. Because they are often a part of proper nouns rather than being categorically incorrect I've split off whether they should be replaced into an `include_letters` argument that defaults to `FALSE`. Also, non-ASCII characters trigger errors in the R package submission process so the example below uses "u-escape" codes for ASCII characters to still demonstrate the function without causing errors when I submit version updates of `supportR` to CRAN. ```{r replace-non-ascii-prep} # Make a vector of non-ASCII characters (non_ascii <- c("\u201C", "\u00AC", "\u00D7")) ``` ```{r replace-non-ascii} # Replace them with ASCII equivalents supportR::replace_non_ascii(x = non_ascii, include_letters = F) ``` ### Crop Triangles `crop_tri` allows dropping one "triangle" of a symmetric dataframe / matrix. It also includes a `drop_diag` argument that accepts a logical for whether to drop the diagonal of the data object. This is primarily useful (I find) in allowing piping through this function as opposed to using the base R notation for removing a triangle of a symmetric data object. ```{r crop_tri} # Define a simple matrix wtih symmetric dimensions mat <- matrix(data = c(1:2, 2:1), nrow = 2, ncol = 2) # Crop off it's lower triangle supportR::crop_tri(data = mat, drop_tri = "lower", drop_diag = FALSE) # Drop the diagonal as well supportR::crop_tri(data = mat, drop_tri = "lower", drop_diag = TRUE) ``` ### "Melt" Arrays into Dataframes `array_melt` allows users to 'melt' an array of dimensions X, Y, and Z into a dataframe containing columns "x", "y", "z", and "value" where "value" is whatever was stored at those coordinates in the array. ```{r array_melt} # Make data to fill the array vec1 <- c(5, 9, 3) vec2 <- c(10:15) # Create dimension names (x = col, y = row, z = which matrix) x_vals <- c("Col_1","Col_2","Col_3") y_vals <- c("Row_1","Row_2","Row_3") z_vals <- c("Mat_1","Mat_2") # Make an array from these components g <- array(data = c(vec1, vec2), dim = c(3, 3, 2), dimnames = list(x_vals, y_vals, z_vals)) # "Melt" the array into a dataframe melted <- supportR::array_melt(array = g) # Look at that top of that head(melted) ```