rlabuonora.com

Salvar raspando, un gráfico de barras anotado

En este posteo hago un gráfico de barras con anotaciones para mostrar un problema en el sistema de exámenes de las escuelas de Nueva York.

Diplopma Regents

Los exámenes Regents son una serie de tests estándar de temas de secundaria en el estado de Nueva York. Este set de datos muestra las notas de exámenes en varias materias. Lo que llama la atención es que hay un gran número de examenes con 65, la nota mínima de aprobación.

nyc_regents <-  read_csv("nyc_regents.csv",
                          col_types=
                           cols(
                             score = col_double(),
                             integrated_algebra = col_double(),
                             global_history = col_double(),
                             living_environment = col_double(),
                             english = col_double(),
                             us_history = col_double()))

# reshape

nyc_reshape <- nyc_regents %>% 
  gather(k="subject", v="tests", -score) %>% 
  group_by(score) %>% 
  summarize(tests = sum(tests, na.rm = TRUE))

# plot basico
g <- ggplot(nyc_reshape, aes(score, tests)) + 
       geom_col()



g + 
  labs(caption = "Source: New York City Department of Education",
       title = "Scraping By",
       subtitle = "2010 Regents scores on Algebra, \nGlobal History, Biology, English & US History", 
       y = "", x = "") + 
  scale_x_continuous(breaks = seq(0, 100, 5), expand = expansion(add = c(0, 0))) + 
  scale_y_continuous(position = "right", 
                     labels = scales::number_format(big.mark = ","), # function
                     breaks = c(1e4, 2e4, 3e4))

g + labs(caption = "Fuente: New York City Department of Education",
       title = "Salvar Raspando",
       subtitle = "Notas 2010 Regents en Álgebra, \nHistoria, Biología, e Inglés",
       y = "", x = "") + 
  scale_x_continuous(breaks = seq(0, 100, 5), expand = expansion(add = c(0, 0))) + 
  scale_y_continuous(position = "right", 
                     labels = scales::number_format(big.mark = ","), # function
                     breaks = c(1e4, 2e4, 3e4)) + 
  theme_minimal() + 
  theme(
    panel.grid.major.y = element_line(linetype = "dashed", color = "gray50"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.y = element_text(vjust = 1.5),
    axis.text.x = element_text(vjust = 7),
    plot.margin = margin(unit(c(20, 20, 20, 20), "cm"))
  )

# color a las columnas
ggplot(nyc_reshape, aes(score, tests)) + 
  geom_col(color = "black", fill = "#C4843C") + 
  labs(caption = "Fuente: New York City Department of Education",
       title = "Salvar Raspando",
       subtitle = "Notas 2010 Regents en Álgebra, \nHistoria, Biología, e Inglés",
       y = "", x = "") + 
  scale_x_continuous(breaks = seq(0, 100, 5), expand = expansion(add = c(0, 0))) + 
  scale_y_continuous(position = "right", 
                     labels = scales::number_format(big.mark = ","), # function
                     breaks = c(1e4, 2e4, 3e4)) + 
  theme_minimal() + 
  theme(
    panel.grid.major.y = element_line(linetype = "dashed", color = "gray50"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.y = element_text(vjust = 1.5),
    axis.text.x = element_text(vjust = 7),
    plot.margin = margin(unit(c(20, 20, 20, 20), "cm"))
  )

# Todo junto
nyc_regents %>% 
  gather(k="k", v="v", -score) %>% 
  group_by(score) %>% 
  summarize(v = sum(v, na.rm = TRUE)) %>% 
  ggplot(aes(score, v)) +
  annotate("rect", xmin = 65, xmax=Inf, ymin=0, ymax=Inf, alpha = 0.6, fill = "grey80") + 
  annotate("text", 
           label = "Mínimo para diploma Regents",
           x = 40, y = 2.5e4) + 
  annotate("curve",
           curvature=-.2,
           arrow=arrow(type = "closed", 
                       length = unit(0.2, "cm")), 
           x=50, y=2.8e4, xend=64, yend=3.2e4) + 
  geom_col(color = "black", fill = "#C4843C") + 
  theme_minimal() + 
  scale_x_continuous(breaks = seq(0, 100, 5), expand = expansion(add = c(0, 0))) + 
  scale_y_continuous(position = "right", 
                     labels = scales::number_format(big.mark = ","), # function
                     breaks = c(1e4, 2e4, 3e4)) + 
  labs(caption = "Fuente: New York City Department of Education",
       title = "Salvar Raspando",
       subtitle = "Notas Regents 2010 en Álgebra, \nHistoria, Biología, e Inglés",
       y = "", x = "") +
  labs(y = "", x = "") +
  theme(
    panel.grid.major.y = element_line(linetype = "dashed", color = "gray50"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.y = element_text(vjust = 1.5),
    axis.text.x = element_text(vjust = 7),
    plot.margin = margin(unit(c(20, 20, 20, 20), "cm")) # 
  )