Population Pyramids of Georgia in ggplot2

You can download session 9 files for constructing the population pyramids of Georgia here: RLadies Tbilisi 

rm(list=ls(all=TRUE))
cat("\014")
mypath <- "/Users/GozerTheGozerian/Keymasters Folder/"
setwd(paste(mypath)) #set your working directory
##  [1] "Index"
##  [2] "Variant"
##  [3] "Major.area..region..country.or.area.."
##  [4] "sex"
##  [5] "Notes"
##  [6] "Country.code"
##  [7] "Reference.date..as.of.1.July."
##  [8] "X0.4"
##  [9] "X05.Sep"
## [10] "Oct.14"
## [11] "X15.19"
[...]
## [23] "X75.79"
## [24] "X80."
## [25] "X80.84"
## [26] "X85.89"
## [27] "X90.94"
## [28] "X95.99"
## [29] "X100."
head(pyr)
##   Index   Variant Major.area..region..country.or.area..  sex Notes
## 1     1 Estimates                                 WORLD both
## 2     2 Estimates                                 WORLD both
## 3     3 Estimates                                 WORLD both
## 4     4 Estimates                                 WORLD both
## 5     5 Estimates                                 WORLD both
## 6     6 Estimates                                 WORLD both
##   Country.code Reference.date..as.of.1.July.   X0.4 X05.Sep Oct.14 X15.19
## 1          900                          1950 337432  269550 260286 238628
## 2          900                          1955 402845  315055 263266 254815
## 3          900                          1960 430565  380319 309276 257899
## 4          900                          1965 477798  409020 372817 303891
## 5          900                          1970 522641  458298 403911 367789
## 6          900                          1975 543225  503753 452706 398384
##   X20.24 X25.29 X30.34 X35.39 X40.44 X45.49 X50.54 X55.59 X60.64 X65.69
## 1 221781 194424 166937 162917 147483 127415 107608  88601  73422  55106
## 2 231892 214878 187941 160385 155546 138743 119084  97441  76843  59322
## 3 248413 225957 208747 181632 153398 147699 130210 108435  85064  62665
## 4 251897 242692 219978 202499 174884 145701 138601 119505  95085  70256
## 5 297557 246921 237657 214330 196585 168438 137799 128954 107201  81023
## 6 361883 293531 243384 233137 209181 190299 161058 128755 116704  92385
##   X70.74 X75.79  X80. X80.84 X85.89 X90.94 X95.99 X100.
## 1  37360  21997 14202     NA     NA     NA     NA    NA
## 2  40346  23755 16158     NA     NA     NA     NA    NA
## 3  44018  25986 18061     NA     NA     NA     NA    NA
## 4  47382  29457 21032     NA     NA     NA     NA    NA
## 5  55168  32876 25340     NA     NA     NA     NA    NA
## 6  64337  38934 29743     NA     NA     NA     NA    NA
pyr <- read.csv("Session_2_POPULATION_BY_AGE_BOTH_SEXES.csv", header=T)
names(pyr)
##  [1] "Index"
##  [2] "Variant"
##  [3] "Major.area..region..country.or.area.."
##  [4] "sex"
##  [5] "Notes"
##  [6] "Country.code"
##  [7] "Reference.date..as.of.1.July."
##  [8] "X0.4"
##  [9] "X05.Sep"
## [10] "Oct.14"
## [11] "X15.19"
[...]
## [22] "X70.74"
## [23] "X75.79"
## [24] "X80."
## [25] "X80.84"
## [26] "X85.89"
## [27] "X90.94"
## [28] "X95.99"
## [29] "X100."
#make a new variable with names of all variables:
#make a new variable with names of all variables:
vars <- names(pyr)
#and change those variables names that start with an X
age <- c(paste(seq(0, 75, by=5), "-", seq(4, 79, by=5)), "80+", paste(seq(80, 95, by=5), "-", seq(84, 99, by=5)), "100+")
age
##  [1] "0 - 4"   "5 - 9"   "10 - 14" "15 - 19" "20 - 24" "25 - 29" "30 - 34"
##  [8] "35 - 39" "40 - 44" "45 - 49" "50 - 54" "55 - 59" "60 - 64" "65 - 69"
## [15] "70 - 74" "75 - 79" "80+"     "80 - 84" "85 - 89" "90 - 94" "95 - 99"
## [22] "100+"
names(pyr) <- c(vars[1], vars[2], "Major.Area", "sex", vars[5], vars[6], "year", age)
names(pyr)[1:15]
##  [1] "Index"        "Variant"      "Major.Area"   "sex"
##  [5] "Notes"        "Country.code" "year"         "0 - 4"
##  [9] "5 - 9"        "10 - 14"      "15 - 19"      "20 - 24"
## [13] "25 - 29"      "30 - 34"      "35 - 39"
library(tidyr)
# transform the data from wide to long format
pyr <- gather(pyr, "age.group", "value", 8:29)
head(pyr)
##   Index   Variant Major.Area  sex Notes Country.code year age.group  value
## 1     1 Estimates      WORLD both                900 1950     0 - 4 337432
## 2     2 Estimates      WORLD both                900 1955     0 - 4 402845
## 3     3 Estimates      WORLD both                900 1960     0 - 4 430565
## 4     4 Estimates      WORLD both                900 1965     0 - 4 477798
## 5     5 Estimates      WORLD both                900 1970     0 - 4 522641
## 6     6 Estimates      WORLD both                900 1975     0 - 4 543225
#replace all NA with 0
library(dplyr)
is.na(pyr$value) <- 0
pyr.g <- pyr %>%
 filter(Major.Area=="Georgia"&sex!="both") # exclude "both"

#create an order vector to sort data
o <- seq(1,22, by=1) # 22 is the number of age groups length(unique(pyr$age.group))
oo <- rep(o,28) # 28 number of years
order <- as.vector(sort(oo, decreasing=F))
pyr.g$order <- order
breaks <- pyr.g$age.group
library(ggplot2)
###
# get rid of the 80+ abridged age group
pyr.g1 <- pyr.g[-c(which(pyr.g$age.group=="80+")),]
### simple pyramid plot
p <- ggplot(pyr.g1, aes(x=age.group, y=value, fill=factor(sex)))+
geom_bar(data=pyr.g1 %>%
filter(sex=="female"&year=="2015"),
aes(x=reorder(age.group, order), y=value), stat="identity")+
geom_bar(data=pyr.g1 %>%
filter(sex=="male"&year=="2015"),
aes(x=reorder(age.group, order), y=-value), stat="identity")+ #negative value for males not to overlap; reorder values of age group by order; "identity" is only for bar charts
coord_flip()+ #bending function: flip the coordinates
labs(x = "", y = "")+
scale_fill_manual(values = c(female = "red", male = "blue"), name="")+
scale_x_discrete(breaks=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)), "100+"),labels=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)), "100+" ))+ #not to show all the age groups all the time
scale_y_continuous(breaks=seq(-200,200,25),labels=abs(seq(-200,200,25)))+ #tell R t paste absolute numbers of values not to have negative values on graph
theme_bw()+
theme(axis.text.x = element_text(size=10, color="black"), # size of x axis text
axis.text.y = element_text(size=10, color="black"))

pyr1

#############################################
### STEP 2: add lines/bars to compare other years
#############################################
p+
geom_line(data=pyr.g1 %>%
filter(sex=="male"&year=="1975"),
aes(x=reorder(age.group, order), y=-value), colour="lightblue", group=1)+
geom_line(data=pyr.g1 %>%
filter(sex=="female"&year=="1975"),
aes(x=reorder(age.group, order), y=value), colour="pink", group=1)

pyr2

# bars: since in ggplot the last plot is
#the one that appears on top (hiding everything underneath),
#we can add alpha=0.5 to add some transparence, 1 being the
#full color
p+
geom_bar(data=pyr.g1 %>%
filter(sex=="male"&year=="1975"),
aes(x=reorder(age.group, order), y=-value), fill="lightblue", alpha=.5,stat="identity")+
geom_bar(data=pyr.g1 %>%
filter(sex=="female"&year=="1975"),
aes(x=reorder(age.group, order), y=value), fill="pink", alpha=.5, stat="identity")

pyr3

#######################################################################
# STEP 3: add different legends for the two years: now we only have one for the sex, as the fill factors for all 4 geom_bar(s) is the same
#
ggplot(pyr.g1, aes(x=age.group, y=value, fill=factor(sex), col=factor(year)))+ # add different colors for the two years 1975 and 2015 by adding col=factor(year)
# this part stays the same
geom_bar(data=pyr.g1 %>%
filter(sex=="female"&year=="2015"),
aes(x=reorder(age.group, order), y=value), stat="identity")+
geom_bar(data=pyr.g1 %>%
filter(sex=="male"&year=="2015"),
aes(x=reorder(age.group, order), y=-value), stat="identity")+
geom_bar(data=pyr.g1 %>%
filter(sex=="male"&year=="1975"),
aes(x=reorder(age.group, order),y=-value), alpha=.5,stat="identity")+
geom_bar(data=pyr.g1 %>%
filter(sex=="female"&year=="1975"),
aes(x=reorder(age.group, order), y=value), alpha=.5, stat="identity")+
coord_flip()+
labs(x = "", y = "")+
scale_x_discrete(breaks=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)) , "100+"),labels=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)), "100+" ))+
scale_y_continuous(breaks=seq(-200,200,25),labels=abs(seq(-200,200,25)))+
theme_bw()+
theme(axis.text.x = element_text(size=10, color="black"),
axis.text.y = element_text(size=10, color="black"))+
# add the legends with scale_fill_manual which controls the filling colors for sex and scale_color_manual which controls the border color that distinguisces the two years
scale_fill_manual(values = c(female = "red", male = "blue"), name="")+
scale_color_manual(values=c("1975"="black", "2015"="grey"), name="" )+
# and I want the year legend squares to look empty
guides(colour = guide_legend(override.aes = list(alpha = 0))) #makes the squares for the years legend empty of any color

pyr4

################################################################
## STEP 4: one pyramid plot for each year in one page with facet_wrap
##
ggplot(pyr.g1, aes(x=age.group, y=value, fill=factor(sex)))+
geom_bar(data=pyr.g1 %>%
filter(sex=="male"),
aes(x=reorder(age.group, order), y=-value), stat="identity")+
geom_bar(data=pyr.g1 %>%
filter(sex=="female"),
aes(x=reorder(age.group, order), y=value), stat="identity")+
coord_flip()+
labs(x = "", y = "")+
scale_x_discrete(breaks=c( paste(seq(0,90, by=10),"-", seq(4,94, by=10)), "100+" ))+
scale_y_continuous(breaks=seq(-300,300,100),labels=abs(seq(-300,300,100)))+
scale_fill_manual(values = c(female = "red", male = "blue"), name="")+
theme_bw()+
theme(axis.text.x = element_text(size=10, color="black"),
axis.text.y = element_text(size=10, color="black"))+
facet_wrap(~year)

Untitled

pyr.ar <- pyr %>%
filter(Major.Area=="Armenia"&sex!="both") # exclude "both"
pyr.az <- pyr %>%
filter(Major.Area=="Azerbaijan"&sex!="both") # exclude "both"
pyr.ar$order <- order
pyr.az$order <- order
pyr.c <- rbind(pyr.g, pyr.ar, pyr.az)
pyr.c1 <- pyr.c[-c(which(pyr.c$age.group=="80+")),] 

ggplot(pyr.c1,
aes(x=age.group, y=value,
fill=factor(Major.Area)))+
 geom_bar(data=pyr.c1 %>%
filter(sex=="female"&year=="2015"),
aes(x=reorder(age.group, order), y=value), stat="identity")+
geom_bar(data=pyr.c1 %>%
filter(sex=="male"&year=="2015"),
aes(x=reorder(age.group, order), y=-value),
stat="identity")+
coord_flip()+
 labs(x = "", y = "")+
scale_x_discrete(breaks=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)) , "100+"),
labels=c(paste(seq(0,90, by=10),"-", seq(4,94, by=10)), "100+" ))+
 scale_y_continuous(breaks=seq(-400,400,200),
labels=abs(seq(-400,400,200)))+
 theme_bw()+
scale_fill_manual(values = c(Armenia = "red",
Georgia="green", Azerbaijan = "blue"), name="")+
theme(axis.text.x = element_text(size=10, color="black"),
axis.text.y = element_text(size=10, color="black"),
legend.position="none")+
facet_wrap(~Major.Area)
#facet_wrap(~Major.Area, scales="free_x")

Untitled3

And with scales=”free_x”

Untitled1


Advertisements

BAR CHART: a ggplot balance plot (2)

Merchandise trade balance plot in ggplot2

BAR CHART+LINE

Graph 2: Merchandise trade balance

You can find the data for this plot here or alternatively here is the dput data for balance:

structure(list(variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Merchandize Trade Balance", class = "factor"),
type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = "Balance", class = "factor"), year = c(2013L,
2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L,
2013L, 2013L, 2013L), value = c(-0.5, -1.5, -0.1, -0.4, -0.2,
0, 0.1, -0.1, -0.6, -0.2, -0.2, -1.3, 0), geo = structure(c(2L,
4L, 7L, 9L, 1L, 6L, 12L, 5L, 3L, 11L, 10L, 13L, 8L), .Label = c("CIS",
"Dev. Asia Pacific", "Eastern Asia", "Europe", "Latin Am. And Carr.",
"North Africa", "North America", "Oceania", "South Eastern Europe",
"South-Eastern Asia", "Southern Asia", "Sub-Saharan Africa",
"Western Asia"), class = "factor")), .Names = c("variable",
"type", "year", "value", "geo"), class = "data.frame", row.names = c(NA,
-13L))
library(dplyr) #to manipulate the dataset
library(ggplot2) #plotting
mer.bal <- mydt %>%
filter(variable == "Merchandize Trade Balance")

base <- mer.bal %>%
filter(type != "Balance") %>%
mutate(
value = ifelse(type == "Exports", value, -value)
)
balance <- mer.bal %>%
filter(type == "Balance")

ggplot(balance, aes(x = geo, y = value, fill=factor(type))) +
geom_bar(data = base %>%
filter(type=="Exports"), aes(col=type), stat = "identity") +
geom_bar(data = base %>%
filter(type=="Imports"), aes(col=type), stat = "identity") +
geom_bar(data = balance, aes(col=type), stat = "identity", width=.2) +
ggtitle(expression(atop("Merchandise trade balance", atop(italic("(Bln US$ by MDG Regions in 2013)"), "")))) +
theme_bw()+
theme(axis.text.x = element_text(size=8, color="black"),
axis.text.y = element_text(size=8, color="black"),
legend.text=element_text(size=10),
plot.title = element_text(size = 20, face = "bold", colour = "black", vjust = -1))+
scale_fill_manual(values = c(Exports = "#0072B2", Imports = "#56B4E9", Balance="red"), name="") +
scale_colour_manual(values = c(Exports = "#0072B2", Imports = "#56B4E9", Balance="red"), name="") +
coord_flip()+
labs(x = "", y = "")

graph3