Plot maps with base mapping tools and ggmap in R

Plot maps with ‘base’ mapping tools in R

Understanding what kind of data you have (polygons or points?) and what you want to map is pivotal to start your mapping.

  1. First you need a shapefile of the area you want to plot, such as metropolitan France. There are various resources where to get them from: DIVA-GIS and EUROSTAT are those that I use the most. It’s always important to have a .prj file included, as your final map ‘should’ be projecte. I say “should” as sometimes it is just not possible, especially if you work with historical maps.
  2. Upload libraries

Load and prepare data

setwd(paste(mypath))
fr.prj <- readOGR(".", "FRA_adm2")
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "FRA_adm2"
## with 96 features
## It has 18 fields
## NOTE: rgdal::checkCRSArgs: no proj_defs.dat in PROJ.4 shared files
map(fr.prj)
rplot
## Warning in SpatialPolygons2map(database, namefield = namefield): database
## does not (uniquely) contain the field 'name'.

head(fr.prj@data)
##   ID_0 ISO NAME_0 ID_1    NAME_1  ID_2         NAME_2   VARNAME_2
## 0   76 FRA France  989    Alsace 13755       Bas-Rhin  Unterelsaá
## 1   76 FRA France  989    Alsace 13756      Haut-Rhin   Oberelsaá
## 2   76 FRA France  990 Aquitaine 13757       Dordogne        <NA>
## 3   76 FRA France  990 Aquitaine 13758        Gironde Bec-D'Ambes
## 4   76 FRA France  990 Aquitaine 13759         Landes      Landas
## 5   76 FRA France  990 Aquitaine 13760 Lot-Et-Garonne        <NA>
##   NL_NAME_2 HASC_2 CC_2      TYPE_2  ENGTYPE_2 VALIDFR_2 VALIDTO_2
## 0      <NA>  FR.BR <NA> Département Department  17900226   Unknown
## 1      <NA>  FR.HR <NA> Département Department  17900226   Unknown
## 2      <NA>  FR.DD <NA> Département Department  17900226   Unknown
## 3      <NA>  FR.GI <NA> Département Department  17900226   Unknown
## 4      <NA>  FR.LD <NA> Département Department  17900226   Unknown
## 5      <NA>  FR.LG <NA> Département Department  17900226   Unknown
##   REMARKS_2 Shape_Leng Shape_Area
## 0      <NA>   4.538735  0.5840273
## 1      <NA>   3.214178  0.4198797
## 2      <NA>   5.012795  1.0389622
## 3      <NA>   9.200047  1.1489822
## 4      <NA>   5.531231  1.0372815
## 5      <NA>   4.489830  0.6062017
# load or create data
set.seed(100)
myvar <- rnorm(1:96)
# manipulate data for the plot
france.geodata  <- data.frame(id=rownames(fr.prj@data), mapvariable=myvar)
head(france.geodata)
##   id mapvariable
## 1  0  1.12200636
## 2  1  0.05912043
## 3  2 -1.05873510
## 4  3 -1.31513865
## 5  4  0.32392954
## 6  5  0.09152878

Use ggmap

# fortify prepares the shape data for ggplot
france.dataframe <- fortify(fr.prj) # convert to data frame for ggplot
## Regions defined for each Polygons
head(france.dataframe)
##       long      lat order  hole piece id group
## 1 7.847912 49.04728     1 FALSE     1  0   0.1
## 2 7.844539 49.04495     2 FALSE     1  0   0.1
## 3 7.852439 49.04510     3 FALSE     1  0   0.1
## 4 7.854333 49.04419     4 FALSE     1  0   0.1
## 5 7.855955 49.04431     5 FALSE     1  0   0.1
## 6 7.856299 49.03776     6 FALSE     1  0   0.1
#now combine the values by id values in both dataframes
france.dat <- join(france.geodata, france.dataframe, by="id")
head(france.dat)
##   id mapvariable     long      lat order  hole piece group
## 1  0    1.122006 7.847912 49.04728     1 FALSE     1   0.1
## 2  0    1.122006 7.844539 49.04495     2 FALSE     1   0.1
## 3  0    1.122006 7.852439 49.04510     3 FALSE     1   0.1
## 4  0    1.122006 7.854333 49.04419     4 FALSE     1   0.1
## 5  0    1.122006 7.855955 49.04431     5 FALSE     1   0.1
## 6  0    1.122006 7.856299 49.03776     6 FALSE     1   0.1
# Plot 3
p <- ggplot(data=france.dat, aes(x=long, y=lat, group=group))
p <- p + geom_polygon(aes(fill=mapvariable)) +
       geom_path(color="white",size=0.1) +
       coord_equal() +
       scale_fill_gradient(low = "#ffffcc", high = "#ff4444") +
       labs(title="Our map",fill="My variable")
# plot the map
p

image-22-02-2017-at-12-11

Use plot basic

nclassint <- 5 #number of colors to be used in the palette
cat <- classIntervals(myvar, nclassint,style = "jenks") #style refers to how the breaks are created
colpal <- brewer.pal(nclassint,"RdBu")
color <- findColours(cat,rev(colpal)) #sequential
bins <- cat$brks
lb <- length(bins)
plot(fr.prj, col=color,border=T)
legend("bottomleft",fill=rev(colpal),legend=paste(round(bins[-length(bins)],1),":",round(bins[-1],1)),cex=1, bg="white")

image-22-02-2017-at-12-23-copy

Advertisements

A map of the US election results

  1. Upload libraries:
rm(list = ls(all=T)) #clear workspace
library(dplyr)
library(readr)
library(stringr)
library(tidyr)
library(readxl)
library(classInt)
library(RColorBrewer)
library(maptools) #to read shapefiles

2. Download the data files (note they are not ready for use but need some cleaning as there are more areas in the excel files than polygons in the shape file). I copy here the code as I have used it in my script but it’s available at RPubs thanks to David Robinson.

download.file("http://www2.census.gov/prod2/statcomp/usac/excel/LND01.xls", "LND01.xls")
download.file("http://www2.census.gov/prod2/statcomp/usac/excel/POP01.xls", "POP01.xls")

according to metadata, this is Land Area in 2010 and resident population in 2010:

us_county_area <- read_excel("LND01.xls") 
transmute(CountyCode = as.character(as.integer(STCOU)), Area = LND110210D)

us_county_population <- read_excel("POP01.xls") 
transmute(CountyCode = as.character(as.integer(STCOU)),Population = POP010210D)

3. Adjust data

election_url <- "https://raw.githubusercontent.com/Prooffreader/election_2016_data/master/data/presidential_general_election_2016_by_county.csv"
county_data <- read_csv(election_url) 
group_by(CountyCode = as.character(fips)) 
mutate(TotalVotes = sum(votes)) 
ungroup() 
mutate(name = str_replace(name, ".\\. ", "")) 
filter(name %in% c("Trump", "Clinton", "Johnson", "Stein")) 
transmute(County = str_replace(geo_name, " County", ""),
State = state,
CountyCode = as.character(fips),
Candidate = name,
Percent = vote_pct / 100,
TotalVotes) 
spread(Candidate, Percent, fill = 0) 
inner_join(us_county_population, by = "CountyCode") 
inner_join(us_county_area, by = "CountyCode")

you can save the data into a csv file:

# write_csv(county_data, "county_election_2016.csv")

You can download the cleaned datafile here: data_election_2016_by_county

4. Upload data and shape files

setwd("/Users/...")
dt <- read.csv("new_county_election_2016.csv", header=T)
us <- readShapePoly("./USA_adm/USA_adm2.shp")
us0 <- readShapePoly("./USA_adm/USA_adm0.shp")
us.m <- us[-c(which(us$NAME_1=="Alaska")),] #get rid of Alaska
us.d <- us.m[-c(67:71),]

5. Prepare the color palette(s)

nclassint <- 5 #number of colors to be used in the palette
cat.T <- classIntervals(dt$Trump[-c(67:71)], nclassint,style = "jenks") #style refers to how the breaks are created
colpal.T <- brewer.pal(nclassint,"Reds")
color.T <- findColours(cat.T,colpal.T) #sequential
bins.T <- cat.T$brks
lb.T <- length(bins.T)

5. Plot the maps with map basic

# pdf("Where are the trump voters.pdf")
# plot(us.d, col=color.T, border=F)
# plot(us0,add=T, lwd=0.1)
# legend("bottomleft",fill=colpal.T,legend=paste(round(bins[-length(bins.T)],1),":",round(bins.T[-1],1)),cex=1, bg="white")
# dev.off()
clinton-voters
% Votes for Clinton
where-are-the-trump-voters
% Votes for Trump

… or ggplot2

library(ggplot2)
library(scales)
theme_set(theme_bw())

ggplot(county_data, aes(Population / Area, Trump)) +
  geom_point() +
  geom_point(data=county_data[which(county_data$State=="Texas"),], aes(x=Population/Area, y=Trump), colour="red")+
  scale_x_log10() +
  scale_y_continuous(labels = percent_format()) +
  xlab("Population density (ppl / square mile)") +
  ylab("% of votes going to Trump") +
  geom_text(aes(label = County), vjust = 1, hjust = 1, check_overlap = TRUE) +
  geom_smooth(method = "lm") +
  ggtitle("Population density vs Trump voters by county (Texas Counties in red)")

This is the code to plot in red points according to State (in red) and to add red labels to those points. The check_overlap=T avoids overlapping labels.

# ggplot(county_data, aes(Population / Area, Trump)) +
#   geom_point() +
#   geom_point(data=county_data[which(county_data$State=="California"),], aes(x=Population/Area, y=Trump), colour="red")+
#   scale_x_log10() +
#   scale_y_continuous(labels = percent_format()) +
#   xlab("Population density (ppl / square mile)") +
#   ylab("% of votes going to Trump") +
#   geom_text(data=county_data[which(county_data$State=="California"),], aes(label = ifelse(Trump&amp;gt;.5, as.character(dt$County), "" )), color= "red",size=5,vjust = 1, hjust = 1, check_overlap = TRUE) +
#   geom_smooth(method = "lm") +
#   ggtitle("Population density vs Trump voters by county (California in red)")

rplot1geom_point_texas

californiaclinton

The marriage market in XIX century Spain. La soltería en España en el siglo XIX

Using data on single men (21-35 years old) and women (16-30 years old) you can map where the unbalances in the marriage market are caused by “excess” of male or female population

Using data on single men (21-35 years old) and women (16-30 years old) you can map where the unbalances in the marriage market are caused by “excess” of male or female population.

Untitled.png

A match made in R: checking the order of geographical areas in shape files and in your data frames

Not every shape file is as nice as those provided in libraries. Sometimes we have to deal with historical maps, which have been hand-drawn, re-touched and what not. To work with geo-referenced data it is essential to have a variable in both shape file and dataframe with unique coding that has exactly the same number of areas and the same ordering in both files.

A quick way to check if shapefile and dataframe have the same number of areas:

nrow(df) == length(shape.file$Code)

In the shapefile, one can also select a couple of areas big enough so that they can easily be located, and plot them as “control” areas.
For instance, I want to select the area with code “15078” in the shapefile:
>which(shape.file$Code=="15078",arr.Ind=T)
[1] 271

which is the area in the 271-th position (same way shape.file$Code[271] gives the code of area 271).
plot(shape.file)
plot(shape.file[c(271,898),],col="red",border="red",add=T)

this is an easy way to locate your “control” area(s).
Rplot
Ideally, you should have some variable that is identical to the one in the shapefile, a codification of some sort, providing a unique Code, the name of the area or some factors that allow you to locate the area in space.

An easy way to check if both shape file and data frame have the same ordering of geographical areas is to test it:
>code.sh <- cbind(c(1:length(shape.file$Code)),as.vector(shape.file$Code))
>code.df <- cbind(c(1:nrow(df)),df$Code)
>code.df==code
.sh
[,1]  [,2]
[1,] TRUE  TRUE
[2,] TRUE  TRUE
[3,] TRUE  TRUE

What if it’s not?
First option: the inelegant solution
Manually change the order of the areas in a csv file according to the exact order they have in the shape file. It’s easy as you can create an ordinal index for the shapefile codes, paste it in excel, and assign it with a vlookup function.
Second option: the smart R match
In R there is a function called match that returns a vector of the positions of first matches of the first argument in its second:
>my.match <- match(df$Code, shape.file$Code)
NB: to use match the two variables providing the code for the areas have to have the very same unique and identical codes, or else funny stuff happens. To check that everything is in its right place, you can plot the two “control” spatial polygons we chose in the beginning, using their position in the dataframe rather than in the shapefile:
>plot(shape.file)
>plot(shape.file[c(which(df$Code=="305"),which(df$Code=="15078")),],col="orange",add=T)

Game of Thrones maps in R…

The map of GOT world with rivers, roads, lakes, the Wall, and main cities:

GOT_map

Neighborhood relations according to Sphere of Influence pretty much coincide with roads and rivers (package spdep):

GOT_neigh

Paste some images to locate the (surviving) Stark family members, using rasterImage from the png library:

GOT_Stark

Mean Age at Childbearing in Spain 2011

TFR 2011 fixed

A ggmap of 2015 Israeli elections by city

IL_el_percThe recent Israeli elections are a reminder of how Demography and Space play a crucial role in the outcome of the 20th Knesset. For more insight, read the full Demotrends blog post by Ashira Menashe-Oren the demographics of the Israeli electorate here. The map has been done using ggmap and ggplot, two simple mapping tools I really like. If you are interested in the code, below you can find the relative syntax and data.

To start upload the libraries:

library(maptools) #reads the shape file

library(ggmap)

library(ggplot2)

Download the shape file (I normally use Diva-GIS website) and read it:

map.ogr<- readOGR(".","ISR_adm1")

Data set:

df <- structure(list(lon = c(35.148529, 35.303546, 34.753934, 34.781768,34.989571, 34.824785, 34.808871, 34.883879, 34.844675, 34.90761, 35.010397, 34.871326, 35.21371, 34.655314, 34.887762, 34.792501, 34.574252, 34.791462, 34.748019, 34.787384, 34.853196, 34.811272, 34.919652, 34.888075, 35.098051, 35.119773, 34.872938, 34.835226, 34.988099, 35.002462), lat = c(32.517127, 32.699635, 31.394548, 32.0853, 32.794046, 32.068424, 32.072176, 32.149961, 32.162413, 32.178195, 31.890267, 32.184781, 31.768319, 31.804381, 32.084041, 31.973001, 31.668789, 31.252973, 32.013186, 32.015833, 32.321458, 31.892773, 32.434046, 31.951014, 33.008536, 32.809144, 31.931566,32.084932, 31.747041, 31.90912), City = structure(c(30L, 19L,24L, 29L, 9L, 25L, 7L, 11L, 10L, 14L, 16L, 23L, 13L, 1L, 21L,28L, 2L, 4L, 3L, 12L, 20L, 27L, 8L, 15L, 18L, 22L, 26L, 6L, 5L, 17L), .Label = c("Ashdod", "Ashkelon", "Bat yam", "Beersheva",  "Beit  Shemesh", "Bnei brak", "Giv'atayim", "Hadera", "Haifa",  "Herzliyya", "Hod HaSharon", "Holon", "Jerusalem", "Kefar Sava",  "Lod", "Modi'in - Makkabbim - Re'ut", "Modi'in Illit", "Nahariyya", "Nazareth ", "Netanya", "Petach Tikva", "Qiryat Atta", "Ra'annana",  "Rahat", "Ramat gan", "Ramla", "Rehovot", "Rishon", "Tel-Aviv",  "Umm Al-Fahm"), class = "factor"), most.votes = c(96.28, 91.41,  87.62, 34.03, 24.98, 30.93, 40.1, 38.77, 34.2, 34.66, 28.95,  32.75, 23.9, 30.96, 27.87, 29.78, 39.31, 37.17, 32.88, 30.86,  33.14, 26.95, 31.77, 32.22, 34.25, 35.01, 39.1, 57.56, 27.89,  71.63), party = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,  2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L), .Label = c("joint list", "labour", "likud", "yahadut hatora"), class = "factor")), .Names = c("lon", "lat",  "City", "most.votes", "party"), class = "data.frame", row.names = c(NA,  -30L))

get the map using “get_map"

gmap <- get_map(location=c(34.2,29.4,36,33.5),zoom=7,source="stamen",maptype="watercolor")

and plot the map:

ggmap(gmap)+

geom_polygon(aes(x = long, y = lat, group=id), data = map.ogr, color ="blue", fill ="white", alpha = .8, size = .4)+

geom_point(aes(x=lon,y=lat,color=party,size=most.votes),data=df)+ scale_colour_discrete("Coalition", labels = c("Joint List", "Labour","Likud","United Torah Judaism"), breaks = c("joint list", "labour","likud","yahadut hatora")) + scale_size_continuous("Coalition", labels = c("Joint List", "Labour","Likud","United Torah Judaism"), breaks = c("joint list", "labour","likud","yahadut hatora"), range=c(10,15), guide = FALSE)+ theme(axis.text=element_text(size=18), plot.title=element_text(size=rel(3)), legend.key = element_rect(fill = "white"), legend.background =element_rect("white"), legend.text = element_text(size = 25), legend.title = element_text(size = 25))+ guides(colour = guide_legend(override.aes = list(size=8)))+ labs(x="",y="")

IL_el_perc_city_names_color If you want to add city names you can use the “annotate” option, adding the code below after guides(...)+. I have modified the coordinates to avoid overlapping of labels and colored names to match the color of the winner party.

annotate("text",x=c(35.14853+ 0.2,35.21371+0.15,35.00246+ 0.15,34.79146+0.15, 34.98957-0.08,34.78177-0.14), y=c(32.51713,31.76832,31.90912,31.25297, 32.79405,32.08530),size=5,font=3, label=c("Umm Al-Fahm","Jerusalem","Modin  Illit","Beersheva","Haifa","Tel Aviv"), color=c("darkred","blue4","deeppink4", "blue4","springgreen4","green4"))+

For beginners I highly recommend ggplot2 mailing list, a great and shame-free place to learn.