Summary: Assessment of CenSoc-DMF using the DMF as a population denominator. Since there are no covariates in the DMF, the only thing we will really do here is compute the match rates (# of matched / # of people in the DMF). As there is no sex information in the DMF, but only men are matched, I use data from HMD to approximate the male-specific match rate.
library(cowplot) library(ggsci) library(data.table) library(tidyverse) library(here) # read full DMF (unlinked), N = 61,111,735 dmf_full <- fread('/censoc/data/dmf/ssdm.csv') # read linked files dmf_linked <- fread('/censoc/data/censoc_v2.1/censoc_dmf_v2.1.csv') # get IDs of everyne in the linked set dmf_ssns <- dmf_linked$ssn dmf_cons_ssns <- dmf_linked[link_abe_exact_conservative==1, ssn] # flag standard and conservative links in the full/unmatched using ssn dmf_full <- dmf_full %>% mutate(link_standard = ssn %in% dmf_ssns, link_conservative = ssn %in% dmf_cons_ssns) # restrict to used death years, N = 54,625,460 dmf_full_date_restricted <- dmf_full %>% filter(dyear %in% 1975:2005)
hmd <- fread('/home/ipums/mariaosborne-ipums/censoc/hmd_death_counts.txt') hmd <- hmd %>% mutate(Age = case_when(Age == "110+" ~ 110, TRUE ~ as.numeric(Age))) # Compute sex ratio of deaths 1975-2005 for each birth cohort # this will be a little off since we're using cohort = year - age hmd <- hmd %>% mutate(cohort = Year-Age) for (c in 1910:1930) { cdt <- hmd %>% filter(cohort==c) male_deaths <- sum(cdt$Male) total_deaths <- sum(cdt$Total) sex_ratio <- male_deaths/total_deaths print(sex_ratio) }
(date of death considered, gender NOT considered)
dmf_full_date_restricted %>% filter(byear %in% 1900:1935) %>% group_by(byear) %>% summarize(match_rate = mean(link_standard),cons_match_rate = mean(link_conservative)) %>% ggplot(aes(byear)) + geom_point(aes(y=match_rate), color = 'red3') + geom_line(aes(y=match_rate), color = 'red3') + geom_point(aes(y=cons_match_rate), color = 'midnightblue') + geom_line(aes(y=cons_match_rate), color = 'midnightblue') + ylim(0,0.5) + theme_minimal()
# make data frame with matches dmf_matches_sex <- data.frame(matrix(ncol=5, nrow=0)) for (c in 1880:1940) { # count up number of matched nodes for each cohort ctd <- dmf_full_date_restricted %>% filter(byear==c) std_matches <- sum(ctd$link_standard) cons_matches <- sum(ctd$link_conservative) dmf_total_deaths <- nrow(ctd) # compute sex ratio for this cohort cdt_hmd <- hmd %>% filter(cohort==c) male_deaths <- sum(cdt_hmd$Male) total_deaths <- sum(cdt_hmd$Total) sex_ratio <- male_deaths/total_deaths dmf_matches_sex <- rbind(dmf_matches_sex, c(c, std_matches, cons_matches, dmf_total_deaths, sex_ratio)) } # rename columns names(dmf_matches_sex) <- c('cohort', 'num_std', 'num_cons', 'num_deaths', 'sex_ratio') # look at number of deaths, number of matches, and sex ratio per cohort dmf_matches_sex
# multiple total deaths by preseumed sex ratio (% of deaths that are male according to HMD) dmf_matches_sex <- dmf_matches_sex %>% mutate(num_male = num_deaths*sex_ratio) dmf_matches_sex_standard <- dmf_matches_sex %>% mutate(match_rate = num_std/num_male) %>% mutate(match_type = "standard") dmf_matches_sex_cons <- dmf_matches_sex %>% mutate(match_rate = num_cons/num_male) %>% mutate(match_type = "conservative") dmf_match_rate_plot <- dmf_matches_sex_standard %>% bind_rows(dmf_matches_sex_cons) %>% filter(cohort %in% c(1900:1935)) %>% ggplot(aes(x = cohort, y = match_rate, color = match_type, shape = match_type)) + geom_line(size=1) + geom_point(size = 2.5) + theme_cowplot() + ggsci::scale_color_lancet() + labs(x = "Birth Cohort", y = "Match Rate") + ylim(0, .5) + theme(legend.position = "bottom", legend.title = element_blank()) dmf_match_rate_plot ggsave(dmf_match_rate_plot, filename = here("vignettes/assess_match_quality/figs/dmf_denominator_match_rate_plot.pdf"), height = 5, width = 7)
sex_ratio_plot <- ggplot(dmf_matches_sex %>% filter(cohort %in% 1900:1935), aes(cohort, sex_ratio))+ geom_point(size=2.5)+ geom_line(size = .75)+ theme_cowplot()+ ylim(0.3,0.7) + labs(x = "Birth Cohort", y = "Sex Ratio") sex_ratio_plot ggsave(sex_ratio_plot, filename = here("vignettes/assess_match_quality/figs/dmf_sex_ratio_plot.pdf"), height = 5, width = 7)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.