/covid19AgeModel/inst/R-utils/preprocess-contact-rates.r

https://github.com/ImperialCollegeLondon/covid19model · R · 159 lines · 95 code · 42 blank · 22 comment · 13 complexity · d1bdb16969d25c5825dc9de53f9607da MD5 · raw file

  1. library(tidyr)
  2. library(data.table)
  3. aggregate_contact_rates_period = function(countries, path_to_file_contact, age_bands, path_to_file_population){
  4. ## PARTS OF THIS CODE IS TAKEN FROM "Contact-patterns" repository BELONGING TO kassteele
  5. ## https://github.com/kassteele/Contact-patterns
  6. pop.data = read.csv(path_to_file_population)
  7. # create map to country abbrevation to read inputs
  8. country.abb = data.table(country = c("Belgium", "Germany", "Finland", "United_Kingdom", "Italy", "Luxembourg", "Netherlands", "Poland"),
  9. abb = c("BE", "DE", "FI", "GB", "IT", "LU", "NL", "PL"))
  10. # Get ages and number of age classes
  11. age <- 0:99
  12. n.age <- length(age)
  13. # Define age cateogries
  14. age.cat <- sapply(strsplit(gsub("\\[|\\]|\\(|\\+", "", age_bands$age), "-"), as.numeric)
  15. if (is.na(age.cat[[length(age.cat)]][2])) age.cat[[length(age.cat)]][2] <- 99
  16. age.cat <- matrix(unlist(age.cat), ncol = 2, byrow = TRUE)
  17. age.cat <- cut(age, breaks = c(age.cat[,1],100), include.highest = FALSE, right = FALSE)
  18. contact.tab.agg = list()
  19. for(Country in countries){
  20. pop.data.c = subset(pop.data, country == Country & age %in% 0:99)
  21. # Reshape pop.data in wide format.
  22. pop.data.wide <- cbind(
  23. data.frame(age = age),
  24. matrix(pop.data.c$pop, nrow = n.age, ncol = 1, dimnames = list(NULL, c("pop"))))
  25. pop.data.wide <- cbind(pop.data.wide, age.cat = age.cat)
  26. contact.tab.agg[[Country]] = list()
  27. for(period in c("weekday", "weekend")){
  28. load(path_to_file_contact(country.abb[which(country.abb$country == Country), ]$abb, period))
  29. contact.tab.c.p = polymod.tab
  30. # Create a dataframe contact.data with n.age^2 rows.
  31. contact.data <- cbind(
  32. expand.grid(part.age = age, cont.age = age),
  33. matrix(contact.tab.c.p$m, nrow = n.age^2, ncol = 1, dimnames = list(NULL, c("m"))))
  34. # Add age categories to contact.data and pop.data
  35. contact.data <- cbind(contact.data, expand.grid(part.age.cat = age.cat, cont.age.cat = age.cat))
  36. # Aggregate population numbers
  37. pop.data.agg <- aggregate(cbind(pop) ~ age.cat, FUN = sum, data = pop.data.wide)
  38. # Aggegrate contact intensities over ages
  39. record.id.part <- match(x = contact.data$part.age, table = pop.data.wide$age)
  40. record.id.part.agg <- match(x = contact.data$part.age.cat, table = pop.data.agg$age.cat)
  41. contact.data.agg <- within(contact.data, {
  42. m <- pop.data.wide[record.id.part, "pop" ]*m /pop.data.agg[record.id.part.agg, "pop" ]
  43. })
  44. contact.data.agg <- aggregate(cbind(m) ~ part.age.cat + cont.age.cat, FUN = sum, data = contact.data.agg)
  45. # Reorder columns
  46. contact.data.agg <- contact.data.agg[, c("part.age.cat", "cont.age.cat", "m")]
  47. # rename the age
  48. contact.data.agg = dplyr::rename(contact.data.agg, part.age = part.age.cat, cont.age = cont.age.cat)
  49. contact.tab.agg[[Country]][[period]] = contact.data.agg
  50. }
  51. }
  52. return(contact.tab.agg)
  53. }
  54. map_contact_tab_to_matrix_period = function(countries, contact_tab){
  55. contact_tab_matrix = list()
  56. for(Country in countries){
  57. contact_tab_matrix[[Country]] = list()
  58. for(period in c("weekend", "weekday")){
  59. contact_tab_matrix.c.p <- pivot_wider(contact_tab[[Country]][[period]], id_cols= part.age, names_from = cont.age, values_from = m)
  60. stopifnot( all( colnames(contact_tab_matrix.c.p)[-1] == contact_tab_matrix.c.p$part.age ) )
  61. contact_tab_matrix[[Country]][[period]] <- unname(as.matrix(contact_tab_matrix.c.p[,-1]))
  62. }
  63. }
  64. return(contact_tab_matrix)
  65. }
  66. aggregate_contact_rates = function(countries, contact_tab, age_bands, path_to_file_population){
  67. pop.data = read.csv(path_to_file_population)
  68. # Get ages and number of age classes
  69. age <- 0:99
  70. n.age <- length(age)
  71. # Define age cateogries
  72. #age.cat <- cut(age, breaks = seq(0, 100, cntct_by), include.highest = FALSE, right = FALSE)
  73. age.cat <- sapply(strsplit(gsub("\\[|\\]|\\(|\\+", "", age_bands$age), "-"), as.numeric)
  74. if (is.na(age.cat[[length(age.cat)]][2])) age.cat[[length(age.cat)]][2] <- 99
  75. age.cat <- matrix(unlist(age.cat), ncol = 2, byrow = TRUE)
  76. age.cat <- cut(age, breaks = c(age.cat[,1],100), include.highest = FALSE, right = FALSE)
  77. contact.tab.agg = list()
  78. for(Country in countries){
  79. pop.data.c = subset(pop.data, country == Country & age %in% 0:99)
  80. # Reshape pop.data in wide format.
  81. pop.data.wide <- cbind(
  82. data.frame(age = age),
  83. matrix(pop.data.c$pop, nrow = n.age, ncol = 1, dimnames = list(NULL, c("pop"))))
  84. pop.data.wide <- cbind(pop.data.wide, age.cat = age.cat)
  85. contact.tab.c = contact_tab[[Country]]
  86. # Create a dataframe contact.data with n.age^2 rows.
  87. contact.data <- cbind(
  88. expand.grid(part.age = age, cont.age = age),
  89. matrix(contact.tab.c$m, nrow = n.age^2, ncol = 1, dimnames = list(NULL, c("m"))))
  90. # Add age categories to contact.data and pop.data
  91. contact.data <- cbind(contact.data, expand.grid(part.age.cat = age.cat, cont.age.cat = age.cat))
  92. # Aggregate population numbers
  93. pop.data.agg <- aggregate(cbind(pop) ~ age.cat, FUN = sum, data = pop.data.wide)
  94. # Aggegrate contact intensities over ages
  95. record.id.part <- match(x = contact.data$part.age, table = pop.data.wide$age)
  96. record.id.part.agg <- match(x = contact.data$part.age.cat, table = pop.data.agg$age.cat)
  97. contact.data.agg <- within(contact.data, {
  98. m <- pop.data.wide[record.id.part, "pop" ]*m /pop.data.agg[record.id.part.agg, "pop" ]
  99. })
  100. contact.data.agg <- aggregate(cbind(m) ~ part.age.cat + cont.age.cat, FUN = sum, data = contact.data.agg)
  101. # Reorder columns
  102. contact.data.agg <- contact.data.agg[, c("part.age.cat", "cont.age.cat", "m")]
  103. # rename the age
  104. contact.data.agg = dplyr::rename(contact.data.agg, part.age = part.age.cat, cont.age = cont.age.cat)
  105. contact.tab.agg[[Country]] = contact.data.agg
  106. }
  107. return(contact.tab.agg)
  108. }
  109. map_contact_tab_to_matrix = function(countries, contact_tab){
  110. contact_tab_matrix = list()
  111. for(Country in countries){
  112. contact_tab_matrix.c <- pivot_wider(contact_tab[[Country]], id_cols= part.age, names_from = cont.age, values_from = m)
  113. stopifnot( all( colnames(contact_tab_matrix.c)[-1] == contact_tab_matrix.c$part.age ) )
  114. contact_tab_matrix[[Country]] <- unname(as.matrix(contact_tab_matrix.c[,-1]))
  115. }
  116. return(contact_tab_matrix)
  117. }