# Bivariate tilegridmaps with R

I HAVE BEEN EXPERIMENTING WITH A NEW WAY TO VISUALIZE DATA, a bivariate tilegridmap. When I get around to rolling out my tidyPowerPoint workflow we’re going to want something other than bars and lines to fill it up. A graph like this might be a fun option.

We’ll build one, but first, just let me show you one I tweeted earlier today:

In this post, let’s go over how to make this plot with R. If you’re interested in these plots, you might be interested in this post on making a bivariate choropleth map with R. That post explains the color scales and opacity we’ll use to make our plot.

# Get some data

We’ll exploit the same technique we used yesterday to get some state data from the Saint Louis Federal Reserve’s FRED database.

#####################################################################################
## Step 1: Load Libraries ##
#####################################################################################
library(tidyquant)
library(tibbletime)
library(geofacet)
library(viridis)
library(scales)
library(cowplot)   # to arrange plots

#####################################################################################
## Step 2: go get data ##
#####################################################################################

tickers<-c(paste0(us_state_grid3$code,"NA"), # nonfarm employment is NA paste0(us_state_grid3$code,"STHPI"))   # FHFA house price index

df<-tq_get(tickers,get="economic.data",from="2010-01-01")

#####################################################################################
## Organize the data ##
#####################################################################################

df<-mutate(df,
state = substr(symbol,1,2),
var   = substr(symbol,3,10))

# Create rolling mean function using tibbletime::rollify
rolling_mean <- rollify(mean, window = 3)

# Create employment data
df %>% as_tbl_time(index=date) %>%
filter(var=="NA") %>% group_by(state) %>%
mutate(emp_ma3=rolling_mean(price)) %>%
mutate(eg=Delt(emp_ma3,k=12))-> df.emp

# Create house price data
df %>% as_tbl_time(index=date) %>%
filter(var=="STHPI") %>%
group_by(state) %>%
mutate(hpa=Delt(price,k=4)) -> df.hpi

# Create combined data
dfb <- left_join(select(df.hpi,date,state,hpa),
select(df.emp,date,state,eg),
by=c("date","state"))  %>%
filter(!is.na(eg) & !is.na(hpa))

In order to construct our bivariate color scheme we need to compute terciles of our x and y variable.

# Compute quantiles:

dfb %>% group_by(date) %>%
mutate(eg0=quantile(eg,0,na.rm=T),
eg1=quantile(eg,0.33,na.rm=T),
eg2=quantile(eg,0.66,na.rm=T),
eg3=quantile(eg,1,na.rm=T),
hpa0=quantile(hpa,0,na.rm=T),
hpa1=quantile(hpa,0.33,na.rm=T),
hpa2=quantile(hpa,0.66,na.rm=T),
hpa3=quantile(hpa,1,na.rm=T)
)  %>%
mutate(x=ifelse(eg<eg1,1,ifelse(eg<eg2,2,3)),
y=ifelse(hpa<hpa1,1,ifelse(hpa<hpa2,2,3))
)-> dfb

dfq<- dfb %>% group_by(date) %>%
summarize(eg0=quantile(eg,0,na.rm=T),
eg1=quantile(eg,0.33,na.rm=T),
eg2=quantile(eg,0.66,na.rm=T),
eg3=quantile(eg,1,na.rm=T),
hpa0=quantile(hpa,0,na.rm=T),
hpa1=quantile(hpa,0.33,na.rm=T),
hpa2=quantile(hpa,0.66,na.rm=T),
hpa3=quantile(hpa,1,na.rm=T),
eg=0,hpa=0,state="Legend",
x=1,y=1) %>%
filter(date==max(dfb$date)) ## Make the plot This code creates our plot. (gonna be loooong, got to be big to fit everything in…still gotta see if wecan reduce it.) # just get last month (quarter) of data dfb2<-filter(dfb,date==max(dfb$date))

dfq<- dfb2 %>%
group_by(date) %>%
summarize(eg0=quantile(eg,0,na.rm=T),
eg1=quantile(eg,0.33,na.rm=T),
eg2=quantile(eg,0.66,na.rm=T),
eg3=quantile(eg,1,na.rm=T),
hpa0=quantile(hpa,0,na.rm=T),
hpa1=quantile(hpa,0.33,na.rm=T),
hpa2=quantile(hpa,0.66,na.rm=T),
hpa3=quantile(hpa,1,na.rm=T),
eg=0,hpa=0,state="Legend",
x=1,y=1)

gxy<-
ggplot(data=dfb2,
aes(x=eg,y=hpa,label=state,
# see http://lenkiefer.com/2017/04/24/bivariate-map/
# for explanation on the color/fill/alpha
color=atan(y/x),
fill=atan(y/x),
alpha=x+y ))+

# Shade areas based on the terciles
geom_rect(data=dfq,aes(xmin=-Inf,xmax=eg1,ymin=hpa1,ymax=hpa2,fill=atan(2/1),alpha=3/2))+
geom_rect(data=dfq,aes(xmin=eg1,xmax=eg2,ymin=hpa1,ymax=hpa2,fill=atan(2/2),alpha=4/2))+
geom_rect(data=dfq,aes(xmin=eg2,xmax=Inf,ymin=hpa1,ymax=hpa2,fill=atan(2/3),alpha=5/2))+

geom_rect(data=dfq,aes(xmin=-Inf,xmax=eg1,ymin=-Inf,ymax=hpa1,fill=atan(1/1),alpha=2/2))+
geom_rect(data=dfq,aes(xmin=eg1,xmax=eg2,ymin=-Inf,ymax=hpa1,fill=atan(1/2),alpha=3/2))+
geom_rect(data=dfq,aes(xmin=eg2,xmax=Inf,ymin=-Inf,ymax=hpa1,fill=atan(1/3),alpha=4/2))+

geom_rect(data=dfq,aes(xmin=-Inf,xmax=eg1,ymin=hpa2,ymax=Inf,fill=atan(3/1),alpha=4/2))+
geom_rect(data=dfq,aes(xmin=eg1,xmax=eg2,ymin=hpa2,ymax=Inf, fill=atan(3/2),alpha=5/2))+
geom_rect(data=dfq,aes(xmin=eg2,xmax=Inf,ymin=hpa2,ymax=Inf, fill=atan(3/3),alpha=6/2))  +
geom_point(color="gray",size=3,shape=21)+
scale_color_viridis(option="D")+
scale_fill_viridis(option="D")+
theme(legend.position="none",
plot.caption=element_text(hjust=0,size=8,face="italic"))+
labs(x="employment growth", y="house price growth",
title=paste0("In Q2 of ",max(year(dfb2\$date))),
caption="Sources: Employment, U.S. Bureau of Labor Statistics quarterly average of 12-month percent change in nonfarm employment (SA),\nHouse prices 4-quarter percent change in FHFA All-Transactions House Price Index (NSA)")+
scale_x_continuous(labels=percent)+
scale_y_continuous(labels=percent)

g.tile<-
ggplot(data=dfb2,
aes(x=eg,y=hpa,label=state,
# see http://lenkiefer.com/2017/04/24/bivariate-map/
# for explanation on the color/fill/alpha
color=atan(y/x),
fill=atan(y/x),
alpha=x+y ))+
geom_rect(aes(xmin=-Inf,xmax=Inf,ymin=-Inf,ymax=Inf,
alpha=ifelse(state!="Legend",x+y,0)),color=NA)+
# Draw gridlines to demarcate the terciles on the scatterplot
geom_segment(aes(x=eg0,xend=eg3,y=hpa0,yend=hpa0),color="lightgray")+
geom_segment(aes(x=eg0,xend=eg3,y=hpa1,yend=hpa1),color="lightgray")+
geom_segment(aes(x=eg0,xend=eg3,y=hpa2,yend=hpa2),color="lightgray")+
geom_segment(aes(x=eg0,xend=eg3,y=hpa3,yend=hpa3),color="lightgray")+
geom_segment(aes(x=eg0,xend=eg0,y=hpa0,yend=hpa3),color="lightgray")+
geom_segment(aes(x=eg1,xend=eg1,y=hpa0,yend=hpa3),color="lightgray")+
geom_segment(aes(x=eg2,xend=eg2,y=hpa0,yend=hpa3),color="lightgray")+
geom_segment(aes(x=eg3,xend=eg3,y=hpa0,yend=hpa3),color="lightgray")+

# add a point to indicate where this state lies
geom_point(alpha=1,color="black")+
guides(fill=F,color=F,alpha=F)+
scale_color_viridis(name="Color scale",option="D")+
scale_fill_viridis(name="Color scale",option="D")+
# Use geofacet::facet_geo to arrange the tiles
facet_geo(~state, grid="us_state_grid1",label="code")  +
theme(axis.text=element_blank(),
plot.title=element_text(hjust=0,face="bold",size=18),
plot.subtitle=element_text(hjust=0,face="italic",size=14),
axis.ticks=element_blank(),
axis.line=element_blank(),
strip.background=element_blank(),
panel.background=element_blank(),
plot.background=element_rect(fill="white"),
panel.grid.major = element_line(colour = NA),
panel.grid.minor = element_line(colour = NA))+
labs(x="",y="",title="Bivariate Tilegrid Map!",subtitle="@lenkiefer")

# Arrange out plots with cowplot::plot_grid
plot_grid(g.tile,
gxy,
ncol=1,
rel_heights=c(5,2))

### So maybe?

Not quite sure if this viz hits the mark, but I think there are some interesting ideas in here. Let me know what you think.