Population Pyramids of Georgia in ggplot2

You can download session 9 files for constructing the population pyramids of Georgia here: RLadies Tbilisi 

rm(list=ls(all=TRUE))
cat("\014")
mypath <- "/Users/GozerTheGozerian/Keymasters Folder/"
setwd(paste(mypath)) #set your working directory
##  [1] "Index"
##  [2] "Variant"
##  [3] "Major.area..region..country.or.area.."
##  [4] "sex"
##  [5] "Notes"
##  [6] "Country.code"
##  [7] "Reference.date..as.of.1.July."
##  [8] "X0.4"
##  [9] "X05.Sep"
## [10] "Oct.14"
## [11] "X15.19"
[...]
## [23] "X75.79"
## [24] "X80."
## [25] "X80.84"
## [26] "X85.89"
## [27] "X90.94"
## [28] "X95.99"
## [29] "X100."
head(pyr)
##   Index   Variant Major.area..region..country.or.area..  sex Notes
## 1     1 Estimates                                 WORLD both
## 2     2 Estimates                                 WORLD both
## 3     3 Estimates                                 WORLD both
## 4     4 Estimates                                 WORLD both
## 5     5 Estimates                                 WORLD both
## 6     6 Estimates                                 WORLD both
##   Country.code Reference.date..as.of.1.July.   X0.4 X05.Sep Oct.14 X15.19
## 1          900                          1950 337432  269550 260286 238628
## 2          900                          1955 402845  315055 263266 254815
## 3          900                          1960 430565  380319 309276 257899
## 4          900                          1965 477798  409020 372817 303891
## 5          900                          1970 522641  458298 403911 367789
## 6          900                          1975 543225  503753 452706 398384
##   X20.24 X25.29 X30.34 X35.39 X40.44 X45.49 X50.54 X55.59 X60.64 X65.69
## 1 221781 194424 166937 162917 147483 127415 107608  88601  73422  55106
## 2 231892 214878 187941 160385 155546 138743 119084  97441  76843  59322
## 3 248413 225957 208747 181632 153398 147699 130210 108435  85064  62665
## 4 251897 242692 219978 202499 174884 145701 138601 119505  95085  70256
## 5 297557 246921 237657 214330 196585 168438 137799 128954 107201  81023
## 6 361883 293531 243384 233137 209181 190299 161058 128755 116704  92385
##   X70.74 X75.79  X80. X80.84 X85.89 X90.94 X95.99 X100.
## 1  37360  21997 14202     NA     NA     NA     NA    NA
## 2  40346  23755 16158     NA     NA     NA     NA    NA
## 3  44018  25986 18061     NA     NA     NA     NA    NA
## 4  47382  29457 21032     NA     NA     NA     NA    NA
## 5  55168  32876 25340     NA     NA     NA     NA    NA
## 6  64337  38934 29743     NA     NA     NA     NA    NA
pyr <- read.csv("Session_2_POPULATION_BY_AGE_BOTH_SEXES.csv", header=T)
names(pyr)
##  [1] "Index"
##  [2] "Variant"
##  [3] "Major.area..region..country.or.area.."
##  [4] "sex"
##  [5] "Notes"
##  [6] "Country.code"
##  [7] "Reference.date..as.of.1.July."
##  [8] "X0.4"
##  [9] "X05.Sep"
## [10] "Oct.14"
## [11] "X15.19"
[...]
## [22] "X70.74"
## [23] "X75.79"
## [24] "X80."
## [25] "X80.84"
## [26] "X85.89"
## [27] "X90.94"
## [28] "X95.99"
## [29] "X100."
#make a new variable with names of all variables:
#make a new variable with names of all variables:
vars <- names(pyr)
#and change those variables names that start with an X
age <- c(paste(seq(0, 75, by=5), "-", seq(4, 79, by=5)), "80+", paste(seq(80, 95, by=5), "-", seq(84, 99, by=5)), "100+")
age
##  [1] "0 - 4"   "5 - 9"   "10 - 14" "15 - 19" "20 - 24" "25 - 29" "30 - 34"
##  [8] "35 - 39" "40 - 44" "45 - 49" "50 - 54" "55 - 59" "60 - 64" "65 - 69"
## [15] "70 - 74" "75 - 79" "80+"     "80 - 84" "85 - 89" "90 - 94" "95 - 99"
## [22] "100+"
names(pyr) <- c(vars[1], vars[2], "Major.Area", "sex", vars[5], vars[6], "year", age)
names(pyr)[1:15]
##  [1] "Index"        "Variant"      "Major.Area"   "sex"
##  [5] "Notes"        "Country.code" "year"         "0 - 4"
##  [9] "5 - 9"        "10 - 14"      "15 - 19"      "20 - 24"
## [13] "25 - 29"      "30 - 34"      "35 - 39"
library(tidyr)
# transform the data from wide to long format
pyr <- gather(pyr, "age.group", "value", 8:29)
head(pyr)
##   Index   Variant Major.Area  sex Notes Country.code year age.group  value
## 1     1 Estimates      WORLD both                900 1950     0 - 4 337432
## 2     2 Estimates      WORLD both                900 1955     0 - 4 402845
## 3     3 Estimates      WORLD both                900 1960     0 - 4 430565
## 4     4 Estimates      WORLD both                900 1965     0 - 4 477798
## 5     5 Estimates      WORLD both                900 1970     0 - 4 522641
## 6     6 Estimates      WORLD both                900 1975     0 - 4 543225
#replace all NA with 0
library(dplyr)
is.na(pyr$value) <- 0
pyr.g <- pyr %>%
 filter(Major.Area=="Georgia"&sex!="both") # exclude "both"

#create an order vector to sort data
o <- seq(1,22, by=1) # 22 is the number of age groups length(unique(pyr$age.group))
oo <- rep(o,28) # 28 number of years
order <- as.vector(sort(oo, decreasing=F))
pyr.g$order <- order
breaks <- pyr.g$age.group
library(ggplot2)
###
# get rid of the 80+ abridged age group
pyr.g1 <- pyr.g[-c(which(pyr.g$age.group=="80+")),]
### simple pyramid plot
p <- ggplot(pyr.g1, aes(x=age.group, y=value, fill=factor(sex)))+
geom_bar(data=pyr.g1 %>%
filter(sex=="female"&year=="2015"),
aes(x=reorder(age.group, order), y=value), stat="identity")+
geom_bar(data=pyr.g1 %>%
filter(sex=="male"&year=="2015"),
aes(x=reorder(age.group, order), y=-value), stat="identity")+ #negative value for males not to overlap; reorder values of age group by order; "identity" is only for bar charts
coord_flip()+ #bending function: flip the coordinates
labs(x = "", y = "")+
scale_fill_manual(values = c(female = "red", male = "blue"), name="")+
scale_x_discrete(breaks=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)), "100+"),labels=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)), "100+" ))+ #not to show all the age groups all the time
scale_y_continuous(breaks=seq(-200,200,25),labels=abs(seq(-200,200,25)))+ #tell R t paste absolute numbers of values not to have negative values on graph
theme_bw()+
theme(axis.text.x = element_text(size=10, color="black"), # size of x axis text
axis.text.y = element_text(size=10, color="black"))

pyr1

#############################################
### STEP 2: add lines/bars to compare other years
#############################################
p+
geom_line(data=pyr.g1 %>%
filter(sex=="male"&year=="1975"),
aes(x=reorder(age.group, order), y=-value), colour="lightblue", group=1)+
geom_line(data=pyr.g1 %>%
filter(sex=="female"&year=="1975"),
aes(x=reorder(age.group, order), y=value), colour="pink", group=1)

pyr2

# bars: since in ggplot the last plot is
#the one that appears on top (hiding everything underneath),
#we can add alpha=0.5 to add some transparence, 1 being the
#full color
p+
geom_bar(data=pyr.g1 %>%
filter(sex=="male"&year=="1975"),
aes(x=reorder(age.group, order), y=-value), fill="lightblue", alpha=.5,stat="identity")+
geom_bar(data=pyr.g1 %>%
filter(sex=="female"&year=="1975"),
aes(x=reorder(age.group, order), y=value), fill="pink", alpha=.5, stat="identity")

pyr3

#######################################################################
# STEP 3: add different legends for the two years: now we only have one for the sex, as the fill factors for all 4 geom_bar(s) is the same
#
ggplot(pyr.g1, aes(x=age.group, y=value, fill=factor(sex), col=factor(year)))+ # add different colors for the two years 1975 and 2015 by adding col=factor(year)
# this part stays the same
geom_bar(data=pyr.g1 %>%
filter(sex=="female"&year=="2015"),
aes(x=reorder(age.group, order), y=value), stat="identity")+
geom_bar(data=pyr.g1 %>%
filter(sex=="male"&year=="2015"),
aes(x=reorder(age.group, order), y=-value), stat="identity")+
geom_bar(data=pyr.g1 %>%
filter(sex=="male"&year=="1975"),
aes(x=reorder(age.group, order),y=-value), alpha=.5,stat="identity")+
geom_bar(data=pyr.g1 %>%
filter(sex=="female"&year=="1975"),
aes(x=reorder(age.group, order), y=value), alpha=.5, stat="identity")+
coord_flip()+
labs(x = "", y = "")+
scale_x_discrete(breaks=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)) , "100+"),labels=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)), "100+" ))+
scale_y_continuous(breaks=seq(-200,200,25),labels=abs(seq(-200,200,25)))+
theme_bw()+
theme(axis.text.x = element_text(size=10, color="black"),
axis.text.y = element_text(size=10, color="black"))+
# add the legends with scale_fill_manual which controls the filling colors for sex and scale_color_manual which controls the border color that distinguisces the two years
scale_fill_manual(values = c(female = "red", male = "blue"), name="")+
scale_color_manual(values=c("1975"="black", "2015"="grey"), name="" )+
# and I want the year legend squares to look empty
guides(colour = guide_legend(override.aes = list(alpha = 0))) #makes the squares for the years legend empty of any color

pyr4

################################################################
## STEP 4: one pyramid plot for each year in one page with facet_wrap
##
ggplot(pyr.g1, aes(x=age.group, y=value, fill=factor(sex)))+
geom_bar(data=pyr.g1 %>%
filter(sex=="male"),
aes(x=reorder(age.group, order), y=-value), stat="identity")+
geom_bar(data=pyr.g1 %>%
filter(sex=="female"),
aes(x=reorder(age.group, order), y=value), stat="identity")+
coord_flip()+
labs(x = "", y = "")+
scale_x_discrete(breaks=c( paste(seq(0,90, by=10),"-", seq(4,94, by=10)), "100+" ))+
scale_y_continuous(breaks=seq(-300,300,100),labels=abs(seq(-300,300,100)))+
scale_fill_manual(values = c(female = "red", male = "blue"), name="")+
theme_bw()+
theme(axis.text.x = element_text(size=10, color="black"),
axis.text.y = element_text(size=10, color="black"))+
facet_wrap(~year)

Untitled

pyr.ar <- pyr %>%
filter(Major.Area=="Armenia"&sex!="both") # exclude "both"
pyr.az <- pyr %>%
filter(Major.Area=="Azerbaijan"&sex!="both") # exclude "both"
pyr.ar$order <- order
pyr.az$order <- order
pyr.c <- rbind(pyr.g, pyr.ar, pyr.az)
pyr.c1 <- pyr.c[-c(which(pyr.c$age.group=="80+")),] 

ggplot(pyr.c1,
aes(x=age.group, y=value,
fill=factor(Major.Area)))+
 geom_bar(data=pyr.c1 %>%
filter(sex=="female"&year=="2015"),
aes(x=reorder(age.group, order), y=value), stat="identity")+
geom_bar(data=pyr.c1 %>%
filter(sex=="male"&year=="2015"),
aes(x=reorder(age.group, order), y=-value),
stat="identity")+
coord_flip()+
 labs(x = "", y = "")+
scale_x_discrete(breaks=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)) , "100+"),
labels=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)), "100+" ))+
 scale_y_continuous(breaks=seq(-400,400,200),
labels=abs(seq(-400,400,200)))+
 theme_bw()+
scale_fill_manual(values = c(Armenia = "red",
Georgia="green", Azerbaijan = "blue"), name="")+
theme(axis.text.x = element_text(size=10, color="black"),
axis.text.y = element_text(size=10, color="black"),
legend.position="none")+
facet_wrap(~Major.Area)
#facet_wrap(~Major.Area, scales="free_x")

Untitled3

And with scales=”free_x”

Untitled1


Advertisements

BAR CHART: a ggplot balance plot (2)

Merchandise trade balance plot in ggplot2

BAR CHART+LINE

Graph 2: Merchandise trade balance

You can find the data for this plot here or alternatively here is the dput data for balance:

structure(list(variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Merchandize Trade Balance", class = "factor"),
type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = "Balance", class = "factor"), year = c(2013L,
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L,
2013L, 2013L, 2013L), value = c(-0.5, -1.5, -0.1, -0.4, -0.2,
0, 0.1, -0.1, -0.6, -0.2, -0.2, -1.3, 0), geo = structure(c(2L,
4L, 7L, 9L, 1L, 6L, 12L, 5L, 3L, 11L, 10L, 13L, 8L), .Label = c("CIS",
"Dev. Asia Pacific", "Eastern Asia", "Europe", "Latin Am. And Carr.",
"North Africa", "North America", "Oceania", "South Eastern Europe",
"South-Eastern Asia", "Southern Asia", "Sub-Saharan Africa",
"Western Asia"), class = "factor")), .Names = c("variable",
"type", "year", "value", "geo"), class = "data.frame", row.names = c(NA,
-13L))
library(dplyr) #to manipulate the dataset
library(ggplot2) #plotting
mer.bal <- mydt %>%
filter(variable == "Merchandize Trade Balance")

base <- mer.bal %>%
filter(type != "Balance") %>%
mutate(
value = ifelse(type == "Exports", value, -value)
)
balance <- mer.bal %>%
filter(type == "Balance")

ggplot(balance, aes(x = geo, y = value, fill=factor(type))) +
geom_bar(data = base %>%
filter(type=="Exports"), aes(col=type), stat = "identity") +
geom_bar(data = base %>%
filter(type=="Imports"), aes(col=type), stat = "identity") +
geom_bar(data = balance, aes(col=type), stat = "identity", width=.2) +
ggtitle(expression(atop("Merchandise trade balance", atop(italic("(Bln US$ by MDG Regions in 2013)"), "")))) +
theme_bw()+
theme(axis.text.x = element_text(size=8, color="black"),
axis.text.y = element_text(size=8, color="black"),
legend.text=element_text(size=10),
plot.title = element_text(size = 20, face = "bold", colour = "black", vjust = -1))+
scale_fill_manual(values = c(Exports = "#0072B2", Imports = "#56B4E9", Balance="red"), name="") +
scale_colour_manual(values = c(Exports = "#0072B2", Imports = "#56B4E9", Balance="red"), name="") +
coord_flip()+
labs(x = "", y = "")

graph3

BAR CHART + LINE: a ggplot balance plot (1)

You can download session 9 files here (R-Ladies Tbilisi) and specify your working directory with setwd(“/Users/mydomain/myforlder/)

BAR CHART + LINE:

###Graph 1: Total services trade, by value

 require(ggplot2)
require(dplyr)
mypath <- "/Users/StayPuftMarshmallowMan/Shandor Folder/"
setwd(paste(mypath))
mydt <- read.csv("Georgia_Data_UN.csv", header=T)

head(mydt)
##                                            variable     type year   value
## 1 GDP: Gross domestic product (million current US$) economic 2014 16530.0
## 2 GDP: Gross domestic product (million current US$) economic 2010 11638.0
## 3 GDP: Gross domestic product (million current US$) economic 2005  6411.0
## 4    GDP growth rate (annual %, const. 2005 prices) economic 2014     4.8
## 5    GDP growth rate (annual %, const. 2005 prices) economic 2010     6.2
## 6    GDP growth rate (annual %, const. 2005 prices) economic 2005     9.6
##   geo
## 1
## 2
## 3
## 4
## 5
## 6
levels(mydt$variable)
##  [1] "Agricultural production index (2004-2006=100)"
##  [2] "Balance (million US$)"
##  [3] "Balance of payments, current account (million US$)"
##  [4] "CO2 emission estimates (tons per capita)"
##  [5] "CPI: Consumer price index (2000=100)"
##  [6] "Economy: Agriculture (% of GVA)"
##  [7] "Economy: Industry (% of GVA)"
##  [8] "Economy: Services and other activity (% of GVA)"
##  [9] "Education: Government expenditure (% of GDP)"
## [10] "Education: Tertiary gross enrolment ratio (f-m per 100 pop.)"
[...]
## [48] "Unemployment (% of labour force)"
## [49] "Urban population (%)"
## [50] "Urban population growth rate (average annual %)"
ser.dt <- mydt %>%
filter(variable=="Total Services Trade")

Balance <- ser.dt%>%
group_by(year)%>%
summarise(value=-diff(value))

Balance <- cbind(variable=c(rep("Total Services Trade", 13)),
type= c(rep("Balance", 13)), Balance, geo=c(rep("NA", 13)))

mydata <- rbind(ser.dt, Balance)

subset with the pipe operator %>%

base <- mydata %>%
filter(type != "Balance") %>%
mutate(
value = ifelse(type == "Exports", value, -value)
)
balance <- mydata %>%
filter(type == "Balance")

ggplot(balance, aes(x = year, y = value)) +
geom_bar(data = base, aes(fill = type), stat = "identity") +
geom_point(aes(colour = type)) +
geom_line(aes(colour = type, group=1)) +
scale_fill_manual(values = c(Exports = "#D55E00", Imports = "#E69F00"), name="") +
scale_colour_manual(values = c(Balance = "#660000"), name="") +
labs(x = "", y = "Total Services Trade")+
theme_bw()

Presentation1

DONUT CHART in ggplot2

 DONUT CHART

I personally don’t like pie charts that much, I prefer donut charts, they take up less space and the center can be used for extra annotations. In ggplot2 to get the “Donut” you design a bar chart (geom_bar) and then just bend it (coord_polar) at the extremities to get a donut.

To reproduce the chart below, you can download the data from the RLadies Tbilisi github webpage, Session 9 on Plotting.

Alternatively here’s the dput(-ted) data:

structure(list(X = 1:3, variable = structure(c(1L, 1L, 1L), .Label = "Export of Services", class = "factor"), type = structure(c(3L, 2L, 1L), .Label = c("Remaining", "Transportation", "Travel"), class = "factor"), year = c(2012L, 2012L, 2012L ), value = c(55.5, 33.4, 11.1), geo = c(NA, NA, NA), pos = c(27.75, 72.2, 94.45)), .Names = c("X", "variable", "type", "year", "value", "geo", "pos"), class = "data.frame", row.names = c(NA, -3L))

Exports of services by EBOPS category

#set the working directory
setwd("/Users/DrVenkman/The Gatekeepers Folder/")

require(dplyr) #data manipulation
require(tidyr) #data manipulation, wide to long format
require(ggplot2) #ggplot package for plotting

exp.ser <- mydt %>%
filter(variable == "Export of Services")

exp.ser <- exp.ser %>% group_by(year) %>% mutate(pos = cumsum(value)- value/2)

p <- ggplot(exp.ser, aes(x=2, y=value, fill=type))+
geom_bar(stat="identity")+
geom_text( aes(label = value, y=pos), size=10, fontface="bold")+
xlim(0.5, 2.5) +
coord_polar(theta = "y")+
labs(x=NULL, y=NULL)+
labs(fill="") +
scale_fill_manual(values = c(Remaining = "blue", Transportation = "#E69F00", Travel= "#D55E00"), name="")+
ggtitle("Exports of services by EBOPS category, 2013")+
theme_bw()+
theme(plot.title = element_text(face="bold",family=c("sans"),size=15),
legend.text=element_text(size=10),
axis.ticks=element_blank(),
axis.text=element_blank(),
axis.title=element_blank(),
panel.grid=element_blank(),
panel.border=element_blank())

p

graph2

 giphy