Bivariate tilegridmaps with R

I HAVE BEEN EXPERIMENTING WITH A NEW WAY TO VISUALIZE DATA, a bivariate tilegridmap. When I get around to rolling out my tidyPowerPoint workflow we’re going to want something other than bars and lines to fill it up. A graph like this might be a fun option.

We’ll build one, but first, just let me show you one I tweeted earlier today:

In this post, let’s go over how to make this plot with R. If you’re interested in these plots, you might be interested in this post on making a bivariate choropleth map with R. That post explains the color scales and opacity we’ll use to make our plot.

Get some data

We’ll exploit the same technique we used yesterday to get some state data from the Saint Louis Federal Reserve’s FRED database.

#####################################################################################
## Step 1: Load Libraries ##
#####################################################################################
library(tidyquant)
library(tibbletime)
library(geofacet)
library(viridis)
library(scales)
library(cowplot)   # to arrange plots

#####################################################################################
## Step 2: go get data ##
#####################################################################################

tickers<-c(paste0(us_state_grid3$code,"NA"),      # nonfarm employment is NA
           paste0(us_state_grid3$code,"STHPI"))   # FHFA house price index

df<-tq_get(tickers,get="economic.data",from="2010-01-01")

#####################################################################################
## Organize the data ##
#####################################################################################

df<-mutate(df,
           state = substr(symbol,1,2), 
           var   = substr(symbol,3,10)) 

# Create rolling mean function using tibbletime::rollify
rolling_mean <- rollify(mean, window = 3)

# Create employment data
df %>% as_tbl_time(index=date) %>%
  filter(var=="NA") %>% group_by(state) %>%
  mutate(emp_ma3=rolling_mean(price)) %>%
  mutate(eg=Delt(emp_ma3,k=12))-> df.emp

# Create house price data
df %>% as_tbl_time(index=date) %>% 
  filter(var=="STHPI") %>% 
  group_by(state) %>%
  mutate(hpa=Delt(price,k=4)) -> df.hpi

# Create combined data
dfb <- left_join(select(df.hpi,date,state,hpa), 
                 select(df.emp,date,state,eg),
                 by=c("date","state"))  %>% 
  filter(!is.na(eg) & !is.na(hpa))

Get the data ready

In order to construct our bivariate color scheme we need to compute terciles of our x and y variable.

# Compute quantiles:

dfb %>% group_by(date) %>%
  mutate(eg0=quantile(eg,0,na.rm=T),
         eg1=quantile(eg,0.33,na.rm=T),
         eg2=quantile(eg,0.66,na.rm=T),
         eg3=quantile(eg,1,na.rm=T),
         hpa0=quantile(hpa,0,na.rm=T),
         hpa1=quantile(hpa,0.33,na.rm=T),
         hpa2=quantile(hpa,0.66,na.rm=T),
         hpa3=quantile(hpa,1,na.rm=T)
         )  %>%
  mutate(x=ifelse(eg<eg1,1,ifelse(eg<eg2,2,3)),
         y=ifelse(hpa<hpa1,1,ifelse(hpa<hpa2,2,3))
         )-> dfb

dfq<- dfb %>% group_by(date) %>%
  summarize(eg0=quantile(eg,0,na.rm=T),
            eg1=quantile(eg,0.33,na.rm=T),
            eg2=quantile(eg,0.66,na.rm=T),
            eg3=quantile(eg,1,na.rm=T),
            hpa0=quantile(hpa,0,na.rm=T),
            hpa1=quantile(hpa,0.33,na.rm=T),
            hpa2=quantile(hpa,0.66,na.rm=T),
            hpa3=quantile(hpa,1,na.rm=T),
            eg=0,hpa=0,state="Legend",
            x=1,y=1) %>% 
  filter(date==max(dfb$date))

Make the plot

This code creates our plot. (gonna be loooong, got to be big to fit everything in…still gotta see if wecan reduce it.)

# just get last month (quarter) of data
dfb2<-filter(dfb,date==max(dfb$date))

dfq<- dfb2 %>%
    group_by(date) %>%
    summarize(eg0=quantile(eg,0,na.rm=T),
              eg1=quantile(eg,0.33,na.rm=T),
              eg2=quantile(eg,0.66,na.rm=T),
              eg3=quantile(eg,1,na.rm=T),
              hpa0=quantile(hpa,0,na.rm=T),
              hpa1=quantile(hpa,0.33,na.rm=T),
              hpa2=quantile(hpa,0.66,na.rm=T),
              hpa3=quantile(hpa,1,na.rm=T),
              eg=0,hpa=0,state="Legend",
              x=1,y=1) 
  
  
gxy<-
  ggplot(data=dfb2,
         aes(x=eg,y=hpa,label=state,
             # see http://lenkiefer.com/2017/04/24/bivariate-map/
             # for explanation on the color/fill/alpha
             color=atan(y/x),
             fill=atan(y/x),
             alpha=x+y ))+
  
  # Shade areas based on the terciles
  geom_rect(data=dfq,aes(xmin=-Inf,xmax=eg1,ymin=hpa1,ymax=hpa2,fill=atan(2/1),alpha=3/2))+
  geom_rect(data=dfq,aes(xmin=eg1,xmax=eg2,ymin=hpa1,ymax=hpa2,fill=atan(2/2),alpha=4/2))+
  geom_rect(data=dfq,aes(xmin=eg2,xmax=Inf,ymin=hpa1,ymax=hpa2,fill=atan(2/3),alpha=5/2))+
  
  geom_rect(data=dfq,aes(xmin=-Inf,xmax=eg1,ymin=-Inf,ymax=hpa1,fill=atan(1/1),alpha=2/2))+
  geom_rect(data=dfq,aes(xmin=eg1,xmax=eg2,ymin=-Inf,ymax=hpa1,fill=atan(1/2),alpha=3/2))+
  geom_rect(data=dfq,aes(xmin=eg2,xmax=Inf,ymin=-Inf,ymax=hpa1,fill=atan(1/3),alpha=4/2))+
  
  geom_rect(data=dfq,aes(xmin=-Inf,xmax=eg1,ymin=hpa2,ymax=Inf,fill=atan(3/1),alpha=4/2))+
  geom_rect(data=dfq,aes(xmin=eg1,xmax=eg2,ymin=hpa2,ymax=Inf, fill=atan(3/2),alpha=5/2))+
  geom_rect(data=dfq,aes(xmin=eg2,xmax=Inf,ymin=hpa2,ymax=Inf, fill=atan(3/3),alpha=6/2))  +
  geom_point(color="gray",size=3,shape=21)+
  scale_color_viridis(option="D")+
  scale_fill_viridis(option="D")+
  theme(legend.position="none",
        plot.caption=element_text(hjust=0,size=8,face="italic"))+
  labs(x="employment growth", y="house price growth",
       title=paste0("In Q2 of ",max(year(dfb2$date))),
       caption="Sources: Employment, U.S. Bureau of Labor Statistics quarterly average of 12-month percent change in nonfarm employment (SA),\nHouse prices 4-quarter percent change in FHFA All-Transactions House Price Index (NSA)")+
  scale_x_continuous(labels=percent)+
  scale_y_continuous(labels=percent)
  
  
  
g.tile<-
  ggplot(data=dfb2,
         aes(x=eg,y=hpa,label=state,
           # see http://lenkiefer.com/2017/04/24/bivariate-map/
           # for explanation on the color/fill/alpha
           color=atan(y/x),       
           fill=atan(y/x),
           alpha=x+y ))+
  geom_rect(aes(xmin=-Inf,xmax=Inf,ymin=-Inf,ymax=Inf,
                alpha=ifelse(state!="Legend",x+y,0)),color=NA)+
  # Draw gridlines to demarcate the terciles on the scatterplot
  geom_segment(aes(x=eg0,xend=eg3,y=hpa0,yend=hpa0),color="lightgray")+
  geom_segment(aes(x=eg0,xend=eg3,y=hpa1,yend=hpa1),color="lightgray")+
  geom_segment(aes(x=eg0,xend=eg3,y=hpa2,yend=hpa2),color="lightgray")+
  geom_segment(aes(x=eg0,xend=eg3,y=hpa3,yend=hpa3),color="lightgray")+
  geom_segment(aes(x=eg0,xend=eg0,y=hpa0,yend=hpa3),color="lightgray")+  
  geom_segment(aes(x=eg1,xend=eg1,y=hpa0,yend=hpa3),color="lightgray")+
  geom_segment(aes(x=eg2,xend=eg2,y=hpa0,yend=hpa3),color="lightgray")+
  geom_segment(aes(x=eg3,xend=eg3,y=hpa0,yend=hpa3),color="lightgray")+
  
  # add a point to indicate where this state lies
  geom_point(alpha=1,color="black")+
  guides(fill=F,color=F,alpha=F)+
  scale_color_viridis(name="Color scale",option="D")+
  scale_fill_viridis(name="Color scale",option="D")+
  # Use geofacet::facet_geo to arrange the tiles
  facet_geo(~state, grid="us_state_grid1",label="code")  +
  theme(axis.text=element_blank(),
        plot.title=element_text(hjust=0,face="bold",size=18),
        plot.subtitle=element_text(hjust=0,face="italic",size=14),
        axis.ticks=element_blank(),
        axis.line=element_blank(),
        strip.background=element_blank(),
        panel.background=element_blank(),
        plot.background=element_rect(fill="white"),
        panel.grid.major = element_line(colour = NA),
        panel.grid.minor = element_line(colour = NA))+
  labs(x="",y="",title="Bivariate Tilegrid Map!",subtitle="@lenkiefer")

# Arrange out plots with cowplot::plot_grid
plot_grid(g.tile,
          gxy,
          ncol=1,
          rel_heights=c(5,2))

So maybe?

Not quite sure if this viz hits the mark, but I think there are some interesting ideas in here. Let me know what you think.

 Share!