Bivariate tilegridmaps with R

I HAVE BEEN EXPERIMENTING WITH A NEW WAY TO VISUALIZE DATA, a bivariate tilegridmap. When I get around to rolling out my tidyPowerPoint workflow weโ€™re going to want something other than bars and lines to fill it up. A graph like this might be a fun option.

Weโ€™ll build one, but first, just let me show you one I tweeted earlier today:

In this post, letโ€™s go over how to make this plot with R. If youโ€™re interested in these plots, you might be interested in this post on making a bivariate choropleth map with R. That post explains the color scales and opacity weโ€™ll use to make our plot.

Get some data

Weโ€™ll exploit the same technique we used yesterday to get some state data from the Saint Louis Federal Reserveโ€™s FRED database.

#####################################################################################
## Step 1: Load Libraries ##
#####################################################################################
library(tidyquant)
library(tibbletime)
library(geofacet)
library(viridis)
library(scales)
library(cowplot)   # to arrange plots

#####################################################################################
## Step 2: go get data ##
#####################################################################################

tickers<-c(paste0(us_state_grid3$code,"NA"),      # nonfarm employment is NA
           paste0(us_state_grid3$code,"STHPI"))   # FHFA house price index

df<-tq_get(tickers,get="economic.data",from="2010-01-01")

#####################################################################################
## Organize the data ##
#####################################################################################

df<-mutate(df,
           state = substr(symbol,1,2), 
           var   = substr(symbol,3,10)) 

# Create rolling mean function using tibbletime::rollify
rolling_mean <- rollify(mean, window = 3)

# Create employment data
df %>% as_tbl_time(index=date) %>%
  filter(var=="NA") %>% group_by(state) %>%
  mutate(emp_ma3=rolling_mean(price)) %>%
  mutate(eg=Delt(emp_ma3,k=12))-> df.emp

# Create house price data
df %>% as_tbl_time(index=date) %>% 
  filter(var=="STHPI") %>% 
  group_by(state) %>%
  mutate(hpa=Delt(price,k=4)) -> df.hpi

# Create combined data
dfb <- left_join(select(df.hpi,date,state,hpa), 
                 select(df.emp,date,state,eg),
                 by=c("date","state"))  %>% 
  filter(!is.na(eg) & !is.na(hpa))

Get the data ready

In order to construct our bivariate color scheme we need to compute terciles of our x and y variable.

# Compute quantiles:

dfb %>% group_by(date) %>%
  mutate(eg0=quantile(eg,0,na.rm=T),
         eg1=quantile(eg,0.33,na.rm=T),
         eg2=quantile(eg,0.66,na.rm=T),
         eg3=quantile(eg,1,na.rm=T),
         hpa0=quantile(hpa,0,na.rm=T),
         hpa1=quantile(hpa,0.33,na.rm=T),
         hpa2=quantile(hpa,0.66,na.rm=T),
         hpa3=quantile(hpa,1,na.rm=T)
         )  %>%
  mutate(x=ifelse(eg<eg1,1,ifelse(eg<eg2,2,3)),
         y=ifelse(hpa<hpa1,1,ifelse(hpa<hpa2,2,3))
         )-> dfb

dfq<- dfb %>% group_by(date) %>%
  summarize(eg0=quantile(eg,0,na.rm=T),
            eg1=quantile(eg,0.33,na.rm=T),
            eg2=quantile(eg,0.66,na.rm=T),
            eg3=quantile(eg,1,na.rm=T),
            hpa0=quantile(hpa,0,na.rm=T),
            hpa1=quantile(hpa,0.33,na.rm=T),
            hpa2=quantile(hpa,0.66,na.rm=T),
            hpa3=quantile(hpa,1,na.rm=T),
            eg=0,hpa=0,state="Legend",
            x=1,y=1) %>% 
  filter(date==max(dfb$date))

Make the plot

This code creates our plot. (gonna be loooong, got to be big to fit everything inโ€ฆstill gotta see if wecan reduce it.)

# just get last month (quarter) of data
dfb2<-filter(dfb,date==max(dfb$date))

dfq<- dfb2 %>%
    group_by(date) %>%
    summarize(eg0=quantile(eg,0,na.rm=T),
              eg1=quantile(eg,0.33,na.rm=T),
              eg2=quantile(eg,0.66,na.rm=T),
              eg3=quantile(eg,1,na.rm=T),
              hpa0=quantile(hpa,0,na.rm=T),
              hpa1=quantile(hpa,0.33,na.rm=T),
              hpa2=quantile(hpa,0.66,na.rm=T),
              hpa3=quantile(hpa,1,na.rm=T),
              eg=0,hpa=0,state="Legend",
              x=1,y=1) 
  
  
gxy<-
  ggplot(data=dfb2,
         aes(x=eg,y=hpa,label=state,
             # see http://lenkiefer.com/2017/04/24/bivariate-map/
             # for explanation on the color/fill/alpha
             color=atan(y/x),
             fill=atan(y/x),
             alpha=x+y ))+
  
  # Shade areas based on the terciles
  geom_rect(data=dfq,aes(xmin=-Inf,xmax=eg1,ymin=hpa1,ymax=hpa2,fill=atan(2/1),alpha=3/2))+
  geom_rect(data=dfq,aes(xmin=eg1,xmax=eg2,ymin=hpa1,ymax=hpa2,fill=atan(2/2),alpha=4/2))+
  geom_rect(data=dfq,aes(xmin=eg2,xmax=Inf,ymin=hpa1,ymax=hpa2,fill=atan(2/3),alpha=5/2))+
  
  geom_rect(data=dfq,aes(xmin=-Inf,xmax=eg1,ymin=-Inf,ymax=hpa1,fill=atan(1/1),alpha=2/2))+
  geom_rect(data=dfq,aes(xmin=eg1,xmax=eg2,ymin=-Inf,ymax=hpa1,fill=atan(1/2),alpha=3/2))+
  geom_rect(data=dfq,aes(xmin=eg2,xmax=Inf,ymin=-Inf,ymax=hpa1,fill=atan(1/3),alpha=4/2))+
  
  geom_rect(data=dfq,aes(xmin=-Inf,xmax=eg1,ymin=hpa2,ymax=Inf,fill=atan(3/1),alpha=4/2))+
  geom_rect(data=dfq,aes(xmin=eg1,xmax=eg2,ymin=hpa2,ymax=Inf, fill=atan(3/2),alpha=5/2))+
  geom_rect(data=dfq,aes(xmin=eg2,xmax=Inf,ymin=hpa2,ymax=Inf, fill=atan(3/3),alpha=6/2))  +
  geom_point(color="gray",size=3,shape=21)+
  scale_color_viridis(option="D")+
  scale_fill_viridis(option="D")+
  theme(legend.position="none",
        plot.caption=element_text(hjust=0,size=8,face="italic"))+
  labs(x="employment growth", y="house price growth",
       title=paste0("In Q2 of ",max(year(dfb2$date))),
       caption="Sources: Employment, U.S. Bureau of Labor Statistics quarterly average of 12-month percent change in nonfarm employment (SA),\nHouse prices 4-quarter percent change in FHFA All-Transactions House Price Index (NSA)")+
  scale_x_continuous(labels=percent)+
  scale_y_continuous(labels=percent)
  
  
  
g.tile<-
  ggplot(data=dfb2,
         aes(x=eg,y=hpa,label=state,
           # see http://lenkiefer.com/2017/04/24/bivariate-map/
           # for explanation on the color/fill/alpha
           color=atan(y/x),       
           fill=atan(y/x),
           alpha=x+y ))+
  geom_rect(aes(xmin=-Inf,xmax=Inf,ymin=-Inf,ymax=Inf,
                alpha=ifelse(state!="Legend",x+y,0)),color=NA)+
  # Draw gridlines to demarcate the terciles on the scatterplot
  geom_segment(aes(x=eg0,xend=eg3,y=hpa0,yend=hpa0),color="lightgray")+
  geom_segment(aes(x=eg0,xend=eg3,y=hpa1,yend=hpa1),color="lightgray")+
  geom_segment(aes(x=eg0,xend=eg3,y=hpa2,yend=hpa2),color="lightgray")+
  geom_segment(aes(x=eg0,xend=eg3,y=hpa3,yend=hpa3),color="lightgray")+
  geom_segment(aes(x=eg0,xend=eg0,y=hpa0,yend=hpa3),color="lightgray")+  
  geom_segment(aes(x=eg1,xend=eg1,y=hpa0,yend=hpa3),color="lightgray")+
  geom_segment(aes(x=eg2,xend=eg2,y=hpa0,yend=hpa3),color="lightgray")+
  geom_segment(aes(x=eg3,xend=eg3,y=hpa0,yend=hpa3),color="lightgray")+
  
  # add a point to indicate where this state lies
  geom_point(alpha=1,color="black")+
  guides(fill=F,color=F,alpha=F)+
  scale_color_viridis(name="Color scale",option="D")+
  scale_fill_viridis(name="Color scale",option="D")+
  # Use geofacet::facet_geo to arrange the tiles
  facet_geo(~state, grid="us_state_grid1",label="code")  +
  theme(axis.text=element_blank(),
        plot.title=element_text(hjust=0,face="bold",size=18),
        plot.subtitle=element_text(hjust=0,face="italic",size=14),
        axis.ticks=element_blank(),
        axis.line=element_blank(),
        strip.background=element_blank(),
        panel.background=element_blank(),
        plot.background=element_rect(fill="white"),
        panel.grid.major = element_line(colour = NA),
        panel.grid.minor = element_line(colour = NA))+
  labs(x="",y="",title="Bivariate Tilegrid Map!",subtitle="@lenkiefer")

# Arrange out plots with cowplot::plot_grid
plot_grid(g.tile,
          gxy,
          ncol=1,
          rel_heights=c(5,2))

So maybe?

Not quite sure if this viz hits the mark, but I think there are some interesting ideas in here. Let me know what you think.