Daily ride counts
library(dplyr)
library(ggplot2)
# Summarize the number of taxi rides by pickup day
daily_count <- tx %>%
group_by(pickup_date) %>%
summarise(n_rides = n())
# Create a line plot
ggplot(daily_count, aes(pickup_date, n_rides)) +
geom_line()
Distribution of cab fare amount
library(ggplot2)
# Create a histogram of total_amount
ggplot(tx, aes(total_amount)) +
geom_histogram() +
scale_x_log10()
Distribution of payment type
library(ggplot2)
# Create a bar chart of payment_type
ggplot(tx, aes(payment_type)) +
geom_bar()
Relationship between trip duration and total fare
library(ggplot2)
# Create a hexagon-binned plot of total_amount vs. trip_duration
ggplot(tx, aes(trip_duration, total_amount)) +
geom_hex(bins = 75) +
scale_x_log10() +
scale_y_log10()
Faceting daily rides
library(dplyr)
library(ggplot2)
# Summarize taxi rides count by payment type, pickup date, pickup day of week
daily_count <- tx %>%
filter(payment_type %in% c("Card", "Cash")) %>%
group_by(pickup_date, pickup_dow, payment_type) %>%
summarise(n_rides = n())
# Plot the data
ggplot(daily_count, aes(pickup_date, n_rides)) +
geom_point() +
facet_grid(payment_type ~ pickup_dow) +
coord_fixed(ratio = 0.4)
Tip amount distribution faceted by payment type
library(ggplot2)
# Histogram of the tip amount faceted on payment type
ggplot(tx, aes(tip_amount + 0.01)) +
geom_histogram() +
scale_x_log10() +
facet_wrap(~ payment_type, ncol = 1, scales = "free_y")
Comparing fare distribution by payment type
library(ggplot2)
library(dplyr)
library(tidyr)
# Get data ready to plot
amount_compare <- tx_pop %>%
mutate(total_no_tip = total_amount - tip_amount) %>%
select(total_amount, total_no_tip, payment_type) %>%
gather(amount_type, amount, -payment_type)
# Quantile plot
ggplot(amount_compare, aes(sample = amount, color = payment_type)) +
geom_qq(distribution = stats::qunif, shape = 21) +
facet_wrap(~ amount_type) +
ylim(c(3, 20))
Trelliscope faceting gapminder by country
library(ggplot2)
library(trelliscopejs)
# Create the plot
ggplot(gapminder, aes(year, lifeExp)) +
geom_line() +
# Facet on country and continent
facet_trelliscope(~ country + continent)
///////////////////
library(ggplot2)
library(trelliscopejs)
ggplot(gapminder, aes(year, lifeExp)) +
geom_line() +
facet_trelliscope(~ country + continent,
name = "lifeExp_by_country",
desc = "Life expectancy vs. year per country",
nrow = 1, ncol = 2)
//3
Customizing the gapminder display
library(trelliscopejs)
library(ggplot2)
# Create the plot
ggplot(gapminder, aes(year, lifeExp)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
facet_trelliscope(~ country + continent,
name = "lifeExp_by_country",
desc = "Life expectancy vs. year for 142 countries.",
nrow = 1, ncol = 2,
# Set the scales
scales = "sliced",
# Specify automatic cognostics
auto_cog = TRUE)
//3
Adding custom cognostics
library(ggplot2)
library(dplyr)
library(gapminder)
library(trelliscopejs)
space_to_dash <- function(x) gsub(" ", "-", x)
# Group by country and create the two new variables
gap <- gapminder %>%
group_by(country) %>%
mutate(
delta_lifeExp = tail(lifeExp, 1) - head(lifeExp, 1),
ihme_link = paste0("http://www.healthdata.org/", space_to_dash(country)))
# Add the description
gap$delta_lifeExp <- cog(gap$delta_lifeExp, desc = "Overall change in life
expectancy")
# Specify the default label
gap$ihme_link <- cog(gap$ihme_link, default_label = TRUE)
ggplot(gap, aes(year, lifeExp)) +
geom_point() +
facet_trelliscope(~ country + continent,
name = "lifeExp_by_country",
desc = "Life expectancy vs. year.",
nrow = 1, ncol = 2,
scales = c("same", "sliced"))
//1
Grouping and nesting
library(dplyr)
library(tidyr)
# Nest stocks by symbol
by_symbol <- stocks %>%
group_by(symbol) %>%
nest()
///////////
//////////2
library(dplyr)
library(tidyr)
library(purrr)
# Nest stocks by symbol
by_symbol <- stocks %>%
group_by(symbol) %>%
nest()
min_volume_fn <- function(x) min(x$volume)
# Create new column
by_symbol_min <- by_symbol %>%
mutate(min_volume = map_dbl(data, min_volume_fn))
Stock price display
library(trelliscopejs)
library(dplyr)
library(purrr)
library(plotly)
ohlc_plot <- function(d) {
plot_ly(d, x = ~date, type = "ohlc",
open = ~open, close = ~close,
high = ~high, low = ~low)
}
by_symbol_plot <- by_symbol %>%
mutate(panel = map_plot(data, ohlc_plot))
////////////////
//3
////////////////
library(trelliscopejs)
library(dplyr)
library(purrr)
library(plotly)
ohlc_plot <- function(d) {
plot_ly(d, x = ~date, type = "ohlc",
open = ~open, close = ~close,
high = ~high, low = ~low)
}
by_symbol_plot <- by_symbol %>%
mutate(panel = map_plot(data, ohlc_plot))
trelliscope(by_symbol_plot, name = "ohlc_top500")
//2
Adding cognostics
library(trelliscopejs)
library(dplyr)
# Create market_cap_log
by_symbol <- mutate(by_symbol,
market_cap_log = cog(
val = log10(market_cap),
desc = "log base 10 market capitalization"
)
)
Cognostics from nested data frames
library(trelliscopejs)
library(dplyr)
library(purrr)
annual_return <- function(x)
100 * (tail(x$close, 1) - head(x$open, 1)) / head(x$open, 1)
# Compute by_symbol_avg
by_symbol_avg <- mutate(by_symbol,
stats = map(data, function(x) {
data_frame(
mean_close = mean(x$close),
mean_volume = mean(x$volume),
annual_return = annual_return(x)
)
}))
//4
Customizing the stock display
library(trelliscopejs)
# Create the trelliscope display
p <- trelliscope(
by_symbol,
name = "ohlc_top500",
desc = "a plot",
width = 600,
height = 300
)
# View the display
p
Visualizing Pokemon
library(dplyr)
library(trelliscopejs)
# Create a new data frame to use for plotting
pokemon2 <- pokemon %>%
# Reduce the variables in the dataset
select(pokemon, type_1, attack, generation_id, url_image) %>%
mutate(
# Respecify pokemon
pokemon = cog(val = pokemon, default_label = TRUE),
# Create panel variable
panel = img_panel(url_image)
)
# Create the display
trelliscope(pokemon2, name = "pokemon", nrow = 3, ncol = 6)
//1
Number of daily rides
library(dplyr)
library(ggplot2)
# Compute daily counts
daily <- bike %>%
group_by(start_day, weekday) %>%
summarise(n = n())
# Plot the result
ggplot(daily, aes(start_day, n, color = weekday)) +
geom_point()
Examining time-of-day
library(dplyr)
library(ggplot2)
# Compute week_hod
week_hod <- bike %>%
group_by(start_wk, start_hod, weekday) %>%
summarise(n = n())
# Plot the result
ggplot(week_hod, aes(start_wk, n, color = weekday)) +
geom_point() +
facet_grid(~ start_hod) +
scale_y_sqrt()
Effect of membership and weekday
library(dplyr)
library(ggplot2)
# Compute wk_memb_hod
wk_memb_hod <- bike %>%
group_by(start_wk, start_hod, weekday, membership) %>%
summarise(n = n())
# Plot the result
ggplot(wk_memb_hod, aes(start_wk, n, color = weekday)) +
geom_point() +
facet_grid(membership ~ start_hod) +
scale_y_sqrt()
Daily plots
library(dplyr)
library(ggplot2)
# Compute daily_may
daily_may <- bike %>%
filter(start_mon == 5) %>%
group_by(start_day, start_hod, membership) %>%
summarise(n = n())
# Plot the result
ggplot(daily_may, aes(start_hod, n, color = membership)) +
geom_point() +
facet_wrap(~ start_day, ncol = 7)
Looking at all days
library(ggplot2)
ggplot(daily_may, aes(start_hod, n, color = membership)) +
geom_point() +
# Facet on start_day
facet_trelliscope(~ start_day, nrow = 3, ncol = 7)
Augmenting the Data: Route Summary Statistics
library(trelliscopejs)
library(ggplot2)
library(dplyr)
# Function to construct a Google maps URL with cycling directions
make_gmap_url <- function(start_lat, start_lon, end_lat, end_lon) {
paste0("https://www.google.com/maps/dir/?api=1",
"&origin=", start_lat, ",", start_lon,
"&destination=", end_lat, ",", end_lon,
"&travelmode=bicycling")
}
# Compute tot_rides, weekday_diff, and map_url
route_hod_updated <- route_hod %>%
group_by(start_station_code, end_station_code) %>%
mutate(
tot_rides = sum(n),
weekday_diff = mean(n[weekday == "workweek"]) - mean(n[weekday ==
"weekend"]),
map_url = make_gmap_url(start_lat, start_lon, end_lat, end_lon))
Visualizing the Data: Counts by Hour-of-Day
library(trelliscopejs)
library(ggplot2)
# Create the plot
ggplot(route_hod, aes(start_hod, n, color = weekday)) +
geom_point(size = 3) +
facet_trelliscope(~ start_station_name + end_station_name, nrow = 2, ncol =
4) +
theme(legend.position = "none")
//3