# Visual meditations on house prices, Part 5: distributions

OVER THE PAST THREE MONTHS I HAVE MADE several new house price visualizations. In these meditations I’ll consider some recent graphs and provide R code for them. For reference, prior meditations are available at:

# Meditation 2: Changes in the Distribution of House Price Appreciation

The NAR data only goes back to 2015Q2, but how has the metro level distribution of house prices changed over the last 10 years or so? In this section we’ll consider a graph I constructed using the Freddie Mac House Price Index (FMHPI), which is available to the public on Freddie Mac’s webpage and goes back to the 1970s for over 300 metro areas.

The data I’m going to use is an updated version of the files fmhpi2.txt I described in Part 1: data wrangling.

For the animation we’ll also be using the tweenr package, which I’ve written about before. See my earlier post about tweenr for an introduction, and more examples here and here.

#load data
metrodata <- fread("data/fmhpi4.txt")  #updated fmhpi file
metrodata$date<-as.Date(metrodata$date, format="%m/%d/%Y")
metrodata<-metrodata[,hpa12:=c(rep(NA,12),((1+diff(hpi,12)/hpi))^1)-1,by=metro]
mdata<-metrodata

#make a function to create a dot histogram, similar to above
myf<-function(mydate){
d<-mdata[date==mydate,]
myhist<-hist(d$hpa12,plot=FALSE, breaks=seq(-.45,62,.005) ) N<-length(myhist$mids)
d<-d[order(hpa12),]
myindex<-0
d$counter<-0 for (i in 1:N){ for (j in 1:myhist$counts[i])
{if (myhist$counts[i]>0){ myindex<-myindex+1 d[myindex, counter:=j] d[myindex, vbin:=myhist$mids[i]]
}}}
d$date<-factor(d$date)
d.out<-as.data.frame(d[, list(date,vbin,counter,hpa12,state,region,metro)])
return(d.out)
}

#create a plot using our function

ggplot(data=myf(unique(metrodata[year==2016 & month==6,]$date)), aes(x=vbin,y=counter,label=metro))+geom_point(size=1.5,color="#00B0F0")+theme_minimal()+ labs(x="Annual House Price Growth (Y/Y % change, NSA index)", y="Count of Metros", title="Metro House Price Distribution", caption="@lenkiefer Source: Freddie Mac House Price Index. Each dot a metro area", subtitle=format(as.Date("2016-06-01"),"%b-%Y"))+ coord_cartesian(xlim=c(-0.2,.2),ylim=c(0,35))+ theme(plot.title=element_text(size=16))+ scale_x_continuous(labels=percent,breaks=seq(-.4,.4,.1))+ theme(plot.caption=element_text(hjust=0,vjust=1,margin=margin(t=10)))+ theme(legend.justification=c(0,0), legend.position="top") ## Adding animation We want to compare how the distribution of annual house price growth has shifted from 2006 to 2016. We’ll compare the annual appreciation in June of each year. We’ll also use tweenr to have the dots smoothly transition between years. dlist<-unique(metrodata[year>2005 & month==6,]$date)  #generate a list of dates
my.list2<-lapply(c(dlist,min(dlist)),myf)  #apply our function to our list of dates

#use tweenr to interploate
tf <- tween_states(my.list2,tweenlength= 3, statelength=1, ease=rep('cubic-in-out',2),nframes=200)
tf<-data.table(unique(tf)) #convert output into data table

oopt = ani.options(interval = 0.15)
saveGIF({for (i in 1:max(tf$.frame)) { #loop over frames g<- ggplot(data=tf[.frame==i,],aes(x=vbin,y=counter,label=metro))+geom_point(size=1.5,alpha=0.75,color="#00B0F0")+theme_minimal()+ labs(x="Annual House Price Growth (Y/Y % change, NSA index)", y="Count of Metros", title="Metro House Price Distribution", caption="@lenkiefer Source: Freddie Mac House Price Index. Each dot a metro area", subtitle=unique(format(as.Date(tf[.frame==i]$date), "%b-%Y")))+
coord_cartesian(xlim=c(-0.4,.4),ylim=c(0,41))+
theme(plot.title=element_text(size=16))+
scale_x_continuous(labels=percent,breaks=seq(-.4,.4,.1))+
theme(plot.caption=element_text(hjust=0,vjust=1,margin=margin(t=10)))+
theme(legend.justification=c(0,0), legend.position="top")
print(g)
ani.pause()
print(i)}
},movie.name="hpi dot tween aug 12 2016 portland highlight.gif",ani.width = 700, ani.height = 500) ## Adding a time series plot, highlighting individual metros

We can use multiplot again to combine the historgram with a line plot. In this case, we’ll loop through all the metro areas and compare the current year-over-year appreciation to the history of that metro from 2000 through 2016 (June). Coder for this plot follows:

d.out<-myf(as.Date("2016-06-01"))  #we'll just plot the data for June 2016
d.out<-data.table(d.out) #make it a data table for ease of use
mlist0<-unique(metrodata$metro) #generate a list of metros oopt = ani.options(interval = 0.25) saveGIF({for (i in 1:length(mlist0)) { #loop over metros g<- ggplot(data=d.out,aes(x=vbin,y=counter,label=metro))+geom_point(size=1.5,alpha=0.75,color="#00B0F0")+ theme_minimal()+ labs(x="Annual House Price Growth (Y/Y % change, NSA index)", y="Count of Metros", title="Metro House Price Appreciation Distribution", caption="@lenkiefer Source: Freddie Mac House Price Index. Each dot/line a metro area", subtitle=paste("(Y/Y % change) in",unique(format(as.Date(d.out$date), "%b-%Y"))))+
coord_cartesian(xlim=c(-0.1,.20),ylim=c(0,35))+
theme(plot.title=element_text(size=16))+
scale_x_continuous(labels=percent,breaks=seq(-.4,.4,.1))+
theme(plot.caption=element_text(hjust=0,vjust=1,margin=margin(t=10)))+
theme(legend.justification=c(0,0), legend.position="top")+
geom_text(data=d.out[metro==mlist0[i]],color="red",aes(y=0),size=3)+
geom_point(data=d.out[metro==mlist0[i]],color="red")

#now make a time series plot

g2<-
ggplot(data=metrodata[year>1999,],aes(x=date,y=hpi,group=metro))+geom_line(color="gray",alpha=0.75)+

theme_minimal()+labs(x="",y="House Price Index (Dec 2000=100, NSA)",
subtitle=mlist0[i],
title="Metro House Price Trends Jan 2000-Jun 2016")+
theme(plot.subtitle=element_text(color="red"))+
geom_line(data=metrodata[year>1999 & metro==mlist0[i],],color="red",size=1.2)  #highlight the metro we want

m<-multiplot(g2,g)

print(m)
ani.pause()
print(i)}
},movie.name="hpi dot combo dot line aug 2016.gif",ani.width = 700, ani.height = 700)

Check out the other visual meditations on house prices