BAR CHART: a ggplot balance plot (2)

Merchandise trade balance plot in ggplot2

BAR CHART+LINE

Graph 2: Merchandise trade balance

You can find the data for this plot here or alternatively here is the dput data for balance:

structure(list(variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Merchandize Trade Balance", class = "factor"),
type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = "Balance", class = "factor"), year = c(2013L,
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L,
2013L, 2013L, 2013L), value = c(-0.5, -1.5, -0.1, -0.4, -0.2,
0, 0.1, -0.1, -0.6, -0.2, -0.2, -1.3, 0), geo = structure(c(2L,
4L, 7L, 9L, 1L, 6L, 12L, 5L, 3L, 11L, 10L, 13L, 8L), .Label = c("CIS",
"Dev. Asia Pacific", "Eastern Asia", "Europe", "Latin Am. And Carr.",
"North Africa", "North America", "Oceania", "South Eastern Europe",
"South-Eastern Asia", "Southern Asia", "Sub-Saharan Africa",
"Western Asia"), class = "factor")), .Names = c("variable",
"type", "year", "value", "geo"), class = "data.frame", row.names = c(NA,
-13L))
library(dplyr) #to manipulate the dataset
library(ggplot2) #plotting
mer.bal <- mydt %>%
filter(variable == "Merchandize Trade Balance")

base <- mer.bal %>%
filter(type != "Balance") %>%
mutate(
value = ifelse(type == "Exports", value, -value)
)
balance <- mer.bal %>%
filter(type == "Balance")

ggplot(balance, aes(x = geo, y = value, fill=factor(type))) +
geom_bar(data = base %>%
filter(type=="Exports"), aes(col=type), stat = "identity") +
geom_bar(data = base %>%
filter(type=="Imports"), aes(col=type), stat = "identity") +
geom_bar(data = balance, aes(col=type), stat = "identity", width=.2) +
ggtitle(expression(atop("Merchandise trade balance", atop(italic("(Bln US$ by MDG Regions in 2013)"), "")))) +
theme_bw()+
theme(axis.text.x = element_text(size=8, color="black"),
axis.text.y = element_text(size=8, color="black"),
legend.text=element_text(size=10),
plot.title = element_text(size = 20, face = "bold", colour = "black", vjust = -1))+
scale_fill_manual(values = c(Exports = "#0072B2", Imports = "#56B4E9", Balance="red"), name="") +
scale_colour_manual(values = c(Exports = "#0072B2", Imports = "#56B4E9", Balance="red"), name="") +
coord_flip()+
labs(x = "", y = "")

graph3

BAR CHART + LINE: a ggplot balance plot (1)

You can download session 9 files here (R-Ladies Tbilisi) and specify your working directory with setwd(“/Users/mydomain/myforlder/)

BAR CHART + LINE:

###Graph 1: Total services trade, by value

 require(ggplot2)
require(dplyr)
mypath <- "/Users/StayPuftMarshmallowMan/Shandor Folder/"
setwd(paste(mypath))
mydt <- read.csv("Georgia_Data_UN.csv", header=T)

head(mydt)
##                                            variable     type year   value
## 1 GDP: Gross domestic product (million current US$) economic 2014 16530.0
## 2 GDP: Gross domestic product (million current US$) economic 2010 11638.0
## 3 GDP: Gross domestic product (million current US$) economic 2005  6411.0
## 4    GDP growth rate (annual %, const. 2005 prices) economic 2014     4.8
## 5    GDP growth rate (annual %, const. 2005 prices) economic 2010     6.2
## 6    GDP growth rate (annual %, const. 2005 prices) economic 2005     9.6
##   geo
## 1
## 2
## 3
## 4
## 5
## 6
levels(mydt$variable)
##  [1] "Agricultural production index (2004-2006=100)"
##  [2] "Balance (million US$)"
##  [3] "Balance of payments, current account (million US$)"
##  [4] "CO2 emission estimates (tons per capita)"
##  [5] "CPI: Consumer price index (2000=100)"
##  [6] "Economy: Agriculture (% of GVA)"
##  [7] "Economy: Industry (% of GVA)"
##  [8] "Economy: Services and other activity (% of GVA)"
##  [9] "Education: Government expenditure (% of GDP)"
## [10] "Education: Tertiary gross enrolment ratio (f-m per 100 pop.)"
[...]
## [48] "Unemployment (% of labour force)"
## [49] "Urban population (%)"
## [50] "Urban population growth rate (average annual %)"
ser.dt <- mydt %>%
filter(variable=="Total Services Trade")

Balance <- ser.dt%>%
group_by(year)%>%
summarise(value=-diff(value))

Balance <- cbind(variable=c(rep("Total Services Trade", 13)),
type= c(rep("Balance", 13)), Balance, geo=c(rep("NA", 13)))

mydata <- rbind(ser.dt, Balance)

subset with the pipe operator %>%

base <- mydata %>%
filter(type != "Balance") %>%
mutate(
value = ifelse(type == "Exports", value, -value)
)
balance <- mydata %>%
filter(type == "Balance")

ggplot(balance, aes(x = year, y = value)) +
geom_bar(data = base, aes(fill = type), stat = "identity") +
geom_point(aes(colour = type)) +
geom_line(aes(colour = type, group=1)) +
scale_fill_manual(values = c(Exports = "#D55E00", Imports = "#E69F00"), name="") +
scale_colour_manual(values = c(Balance = "#660000"), name="") +
labs(x = "", y = "Total Services Trade")+
theme_bw()

Presentation1

DONUT CHART in ggplot2

 DONUT CHART

I personally don’t like pie charts that much, I prefer donut charts, they take up less space and the center can be used for extra annotations. In ggplot2 to get the “Donut” you design a bar chart (geom_bar) and then just bend it (coord_polar) at the extremities to get a donut.

To reproduce the chart below, you can download the data from the RLadies Tbilisi github webpage, Session 9 on Plotting.

Alternatively here’s the dput(-ted) data:

structure(list(X = 1:3, variable = structure(c(1L, 1L, 1L), .Label = "Export of Services", class = "factor"), type = structure(c(3L, 2L, 1L), .Label = c("Remaining", "Transportation", "Travel"), class = "factor"), year = c(2012L, 2012L, 2012L ), value = c(55.5, 33.4, 11.1), geo = c(NA, NA, NA), pos = c(27.75, 72.2, 94.45)), .Names = c("X", "variable", "type", "year", "value", "geo", "pos"), class = "data.frame", row.names = c(NA, -3L))

Exports of services by EBOPS category

#set the working directory
setwd("/Users/DrVenkman/The Gatekeepers Folder/")

require(dplyr) #data manipulation
require(tidyr) #data manipulation, wide to long format
require(ggplot2) #ggplot package for plotting

exp.ser <- mydt %>%
filter(variable == "Export of Services")

exp.ser <- exp.ser %>% group_by(year) %>% mutate(pos = cumsum(value)- value/2)

p <- ggplot(exp.ser, aes(x=2, y=value, fill=type))+
geom_bar(stat="identity")+
geom_text( aes(label = value, y=pos), size=10, fontface="bold")+
xlim(0.5, 2.5) +
coord_polar(theta = "y")+
labs(x=NULL, y=NULL)+
labs(fill="") +
scale_fill_manual(values = c(Remaining = "blue", Transportation = "#E69F00", Travel= "#D55E00"), name="")+
ggtitle("Exports of services by EBOPS category, 2013")+
theme_bw()+
theme(plot.title = element_text(face="bold",family=c("sans"),size=15),
legend.text=element_text(size=10),
axis.ticks=element_blank(),
axis.text=element_blank(),
axis.title=element_blank(),
panel.grid=element_blank(),
panel.border=element_blank())

p

graph2

 giphy

High Resolution Mapping of Fertility and Mortality from Household Survey Data in Low Income Settings – PAA presentation

I will present at PAA my WorldPop mapping of Demographic indicators in low-income settings at PAA in Chicago.  “Advances in Mathematical, Spatial, and Small-Area Demography”, Thursday, April 27, 2017: 10:15 AM – 11:45 AM, Hilton, Joliet Room.

Violin plots in ggplot2

Use geom_violin() to quickly plot a visual summary of variables, using the Boston dataset, MASS library.

Use geom_violin() to quickly plot a visual summary of variables, using the Boston dataset from the MASS library.

1. Upload the relevant libraries:

require(tidyr)
require(ggplot2)
require(RColorBrewer)
require(randomcoloR)
require(MASS)

2. Load data and use the tidyr package to transform wide into long format:

data(Boston)
dt.long <- gather(Boston, "variable",
"value", crim:medv)

3. Create some color palettes:

col <- colorRampPalette(c("red", "blue"))(14)
# col.bp <- brewer.pal(9, "Set1") # brewer.pal only has a max of 9 colors
col.rc <- as.vector(distinctColorPalette(14))

4. Plot(s):

  • With the standard colors produced by ggplot2:
ggplot(dt.long,aes(factor(variable), value))+
geom_violin(aes(fill=factor(variable)))+
geom_boxplot(alpha=0.3, color="black", width=.1)+
labs(x = "", y = "")+
theme_bw()+
theme(legend.title = element_blank())+
facet_wrap(~variable, scales="free")

violin-ggplot-color

  • With the color palette produced by colorRampPalette:
ggplot(dt.long,aes(factor(variable), value))+
geom_violin(aes(fill=factor(variable)))+
geom_boxplot(alpha=0.3, color="black", width=.1)+
labs(x = "", y = "")+
scale_fill_manual(values = col, name="")+
theme_bw()+
facet_wrap(~variable, scales="free")

violin-auto-color

  • With the color palette produced by randomcoloR library:
ggplot(dt.long,aes(factor(variable), value))+
geom_violin(aes(fill=factor(variable)))+
geom_boxplot(alpha=0.3, color="black", width=.1)+
labs(x = "", y = "")+
scale_fill_manual(values = col.rc, name="")+
theme_bw()+
facet_wrap(~variable, scales="free")

violin-rc-color

Plot maps with base mapping tools and ggmap in R

Plot maps with ‘base’ mapping tools in R

Understanding what kind of data you have (polygons or points?) and what you want to map is pivotal to start your mapping.

  1. First you need a shapefile of the area you want to plot, such as metropolitan France. There are various resources where to get them from: DIVA-GIS and EUROSTAT are those that I use the most. It’s always important to have a .prj file included, as your final map ‘should’ be projecte. I say “should” as sometimes it is just not possible, especially if you work with historical maps.
  2. Upload libraries

Load and prepare data

setwd(paste(mypath))
fr.prj <- readOGR(".", "FRA_adm2")
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "FRA_adm2"
## with 96 features
## It has 18 fields
## NOTE: rgdal::checkCRSArgs: no proj_defs.dat in PROJ.4 shared files
map(fr.prj)
rplot
## Warning in SpatialPolygons2map(database, namefield = namefield): database
## does not (uniquely) contain the field 'name'.

head(fr.prj@data)
##   ID_0 ISO NAME_0 ID_1    NAME_1  ID_2         NAME_2   VARNAME_2
## 0   76 FRA France  989    Alsace 13755       Bas-Rhin  Unterelsaá
## 1   76 FRA France  989    Alsace 13756      Haut-Rhin   Oberelsaá
## 2   76 FRA France  990 Aquitaine 13757       Dordogne        <NA>
## 3   76 FRA France  990 Aquitaine 13758        Gironde Bec-D'Ambes
## 4   76 FRA France  990 Aquitaine 13759         Landes      Landas
## 5   76 FRA France  990 Aquitaine 13760 Lot-Et-Garonne        <NA>
##   NL_NAME_2 HASC_2 CC_2      TYPE_2  ENGTYPE_2 VALIDFR_2 VALIDTO_2
## 0      <NA>  FR.BR <NA> Département Department  17900226   Unknown
## 1      <NA>  FR.HR <NA> Département Department  17900226   Unknown
## 2      <NA>  FR.DD <NA> Département Department  17900226   Unknown
## 3      <NA>  FR.GI <NA> Département Department  17900226   Unknown
## 4      <NA>  FR.LD <NA> Département Department  17900226   Unknown
## 5      <NA>  FR.LG <NA> Département Department  17900226   Unknown
##   REMARKS_2 Shape_Leng Shape_Area
## 0      <NA>   4.538735  0.5840273
## 1      <NA>   3.214178  0.4198797
## 2      <NA>   5.012795  1.0389622
## 3      <NA>   9.200047  1.1489822
## 4      <NA>   5.531231  1.0372815
## 5      <NA>   4.489830  0.6062017
# load or create data
set.seed(100)
myvar <- rnorm(1:96)
# manipulate data for the plot
france.geodata  <- data.frame(id=rownames(fr.prj@data), mapvariable=myvar)
head(france.geodata)
##   id mapvariable
## 1  0  1.12200636
## 2  1  0.05912043
## 3  2 -1.05873510
## 4  3 -1.31513865
## 5  4  0.32392954
## 6  5  0.09152878

Use ggmap

# fortify prepares the shape data for ggplot
france.dataframe <- fortify(fr.prj) # convert to data frame for ggplot
## Regions defined for each Polygons
head(france.dataframe)
##       long      lat order  hole piece id group
## 1 7.847912 49.04728     1 FALSE     1  0   0.1
## 2 7.844539 49.04495     2 FALSE     1  0   0.1
## 3 7.852439 49.04510     3 FALSE     1  0   0.1
## 4 7.854333 49.04419     4 FALSE     1  0   0.1
## 5 7.855955 49.04431     5 FALSE     1  0   0.1
## 6 7.856299 49.03776     6 FALSE     1  0   0.1
#now combine the values by id values in both dataframes
france.dat <- join(france.geodata, france.dataframe, by="id")
head(france.dat)
##   id mapvariable     long      lat order  hole piece group
## 1  0    1.122006 7.847912 49.04728     1 FALSE     1   0.1
## 2  0    1.122006 7.844539 49.04495     2 FALSE     1   0.1
## 3  0    1.122006 7.852439 49.04510     3 FALSE     1   0.1
## 4  0    1.122006 7.854333 49.04419     4 FALSE     1   0.1
## 5  0    1.122006 7.855955 49.04431     5 FALSE     1   0.1
## 6  0    1.122006 7.856299 49.03776     6 FALSE     1   0.1
# Plot 3
p <- ggplot(data=france.dat, aes(x=long, y=lat, group=group))
p <- p + geom_polygon(aes(fill=mapvariable)) +
       geom_path(color="white",size=0.1) +
       coord_equal() +
       scale_fill_gradient(low = "#ffffcc", high = "#ff4444") +
       labs(title="Our map",fill="My variable")
# plot the map
p

image-22-02-2017-at-12-11

Use plot basic

nclassint <- 5 #number of colors to be used in the palette
cat <- classIntervals(myvar, nclassint,style = "jenks") #style refers to how the breaks are created
colpal <- brewer.pal(nclassint,"RdBu")
color <- findColours(cat,rev(colpal)) #sequential
bins <- cat$brks
lb <- length(bins)
plot(fr.prj, col=color,border=T)
legend("bottomleft",fill=rev(colpal),legend=paste(round(bins[-length(bins)],1),":",round(bins[-1],1)),cex=1, bg="white")

image-22-02-2017-at-12-23-copy

Find color breaks for mapping (fast)

I’ve stumbled upon a little trick to compute jenks breaks faster than with the classInt package, just be sure to use n+1 instead of n as the breaks are computed a little bit differently. That is to say, if you want 5 breaks, n=6, no biggie there.

For more on the Bayesian Analysis of Macroevolutionary Mixtures see BAMMtools library

install.packages("BAMMtools")
library(BAMMtools)
system.time(getJenksBreaks(mydata$myvar, 6))
> user system elapsed
> 0.970 0.001 0.971

On the other hand this takes way more time with large datasets
library(classInt)
system.time(classIntervals(mydata$myvar, n=5, style="jenks"))
> Timing stopped at: 1081.894 1.345 1083.511