library(tidyverse)
<- read_csv("data/brodhead_center.csv") brodhead_center
Wrangle Data with {dplyr}
{dplyr
} verbs help you wrangle, clean, and normalize your data
dplyr function | use for |
---|---|
select() |
subset columns |
filter() |
subset rows |
arrange() |
sort rows by column variable values |
mutate() |
Create new, or modify variables |
group_by() |
use with summarize for subtotals |
summarize() |
generate column totals and subtotals, etc. |
count() |
a specialized summarize() function |
Examples
First we need to load the {dplyr} package for wrangling and the {readr} package for importing CSV data. In our case, we’ll do that by loading the tidyverse which loads {dplyr}, {readr} and several other helpful packages. Then we need to load our data
select()
|>
brodhead_center select(name, type)
name <chr> | type <chr> | |||
---|---|---|---|---|
Devils Krafthouse | bar and grill | |||
Devils Krafthouse | bar and grill | |||
Devils Krafthouse | bar and grill | |||
Devils Krafthouse | bar and grill | |||
Devils Krafthouse | bar and grill | |||
Devils Krafthouse | bar and grill | |||
Devils Krafthouse | bar and grill | |||
Devils Krafthouse | bar and grill | |||
Devils Krafthouse | bar and grill | |||
Devils Krafthouse | bar and grill |
filter()
|>
brodhead_center filter(menuType == "dessert")
name <chr> | type <chr> | menuType <chr> | itemType <chr> | |
---|---|---|---|---|
Devils Krafthouse | bar and grill | dessert | dessert | |
Devils Krafthouse | bar and grill | dessert | dessert | |
Devils Krafthouse | bar and grill | dessert | dessert | |
Cafe | cafe | dessert | dessert | |
Cafe | cafe | dessert | dessert | |
Cafe | cafe | dessert | dessert | |
Cafe | cafe | dessert | dessert |
arrange()
|>
brodhead_center arrange(cost)
name <chr> | type <chr> | menuType <chr> | itemType <chr> | |
---|---|---|---|---|
Tandoor | restaurant | appetizer | bread | |
Tandoor | restaurant | appetizer | bread | |
Tandoor | restaurant | appetizer | bread | |
Devils Krafthouse | bar and grill | appetizer | snack | |
Devils Krafthouse | bar and grill | appetizer | snack | |
Devils Krafthouse | bar and grill | appetizer | snack | |
Devils Krafthouse | bar and grill | side | soup | |
Tandoor | restaurant | appetizer | appetizer | |
Tandoor | restaurant | appetizer | appetizer | |
Tandoor | restaurant | appetizer | appetizer |
mutate()
|>
brodhead_center mutate(ratings_high = rating * 2)
name <chr> | type <chr> | menuType <chr> | itemType <chr> | |
---|---|---|---|---|
Devils Krafthouse | bar and grill | appetizer | snack | |
Devils Krafthouse | bar and grill | appetizer | snack | |
Devils Krafthouse | bar and grill | appetizer | snack | |
Devils Krafthouse | bar and grill | appetizer | snack | |
Devils Krafthouse | bar and grill | appetizer | snack | |
Devils Krafthouse | bar and grill | appetizer | snack | |
Devils Krafthouse | bar and grill | appetizer | snack | |
Devils Krafthouse | bar and grill | appetizer | snack | |
Devils Krafthouse | bar and grill | entree | sandwich | |
Devils Krafthouse | bar and grill | entree | sandwich |
We can also mutate data by groups or categories
|>
brodhead_center mutate(avg_item_rating_rest = mean(rating, na.rm = TRUE),
.by = name,
.after = name)
name <chr> | avg_item_rating_rest <dbl> | type <chr> | |
---|---|---|---|
Devils Krafthouse | 6.65625 | bar and grill | |
Devils Krafthouse | 6.65625 | bar and grill | |
Devils Krafthouse | 6.65625 | bar and grill | |
Devils Krafthouse | 6.65625 | bar and grill | |
Devils Krafthouse | 6.65625 | bar and grill | |
Devils Krafthouse | 6.65625 | bar and grill | |
Devils Krafthouse | 6.65625 | bar and grill | |
Devils Krafthouse | 6.65625 | bar and grill | |
Devils Krafthouse | 6.65625 | bar and grill | |
Devils Krafthouse | 6.65625 | bar and grill |
count()
Count values in a group | |
---|---|
menuType | n |
entree | 24 |
appetizer | 23 |
dessert | 7 |
side | 5 |
|>
brodhead_center count(menuType)
menuType <chr> | n <int> | |||
---|---|---|---|---|
appetizer | 23 | |||
dessert | 7 | |||
entree | 24 | |||
side | 5 |
group_by()
& summarise()
Summarise column |
---|
Sum_of_cost |
412 |
|>
brodhead_center group_by(name) |>
summarise(min_cost = min(cost), mean_cost = mean(cost), max_cost = max(cost))
name <chr> | min_cost <dbl> | mean_cost <dbl> | max_cost <dbl> | |
---|---|---|---|---|
Cafe | 5 | 6.500000 | 8 | |
Devils Krafthouse | 4 | 7.500000 | 10 | |
Tandoor | 2 | 6.315789 | 12 |
or
Summarize by groups, without group_by()
|>
brodhead_center summarise(min_cost = min(cost), .by = name)
name <chr> | min_cost <dbl> | |||
---|---|---|---|---|
Devils Krafthouse | 4 | |||
Tandoor | 2 | |||
Cafe | 5 |