1 Reading in packages

rm(list=ls())
library(tidyverse)
library(ggplot2)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.4.3
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows

2 Reading in data

load(file="H:/processed_data/df_mmfc.rda")
load(file="H:/processed_data/df_other.rda")
load(file="H:/processed_data/df_noother.rda")

3 Some ad-hoc releveling

levels(as.factor(df_mmfc$phd_disci))

df_mmfc$phd_disci <- factor(df_mmfc$phd_disci, levels=c("Health sciences", "Social sciences", "Natural sciences and mathematics", "Engineering", "Humanities", "Agriculture and animal sciences"))

df_mmfc <- df_mmfc %>% 
  mutate(gender = ifelse(gender==1, "men", "women"))

df_mmfc$gender <- factor(df_mmfc$gender, levels=c("men", "women"))

4 Descriptives

4.1 Number of transitions

df_mmfc %>%
  group_by(gender) %>%
  count(trans_st) %>%
  filter(trans_st==1) %>%
  mutate(N_transitions=n) %>% 
  select(gender, N_transitions) -> N_transitions

write.csv(N_transitions, file="F:/GPE_salaris/N_transitions.csv")

5 Descriptives

5.1 Descriptive table by gender

# flattening time-varying variables (basepay)

columns <- c("F: 1%", "F: 99%", "F: Mean", "F: SD", "M: 1%", "M: 99%", "M: Mean", "M: SD")
rows <- c("Real monthly pay", "Log real monthly pay", "Transition year", "Transition experienced","Time" , "PhD Discipline", "Health sciences", "Social sciences", "Natural sciences and mathematics", "Engineering", "Humanities", "Child under 5", "Log monthly contract hours", "Temporary contract", "PhD cohort", "PhD satisfaction", "Sector", "For-profit", "Government", "Non-profit", "Other job", "Break in Dutch employment", "Period abroad", "Partnered", "Time at transition", "N individuals", "N observations")
  
t1 <- data.frame(matrix(nrow=length(rows), ncol=length(columns)))

colnames(t1) <- columns
rownames(t1) <- rows

df_mmfc %>%
  group_by(gender) %>%
  summarize(across(c(realpay_corr2, log_realpay, t, log_hrs, phd_coh, phd_sat, break_job, abroad_time),
            list(p1 = ~quantile(.x, 0.01), p99 = ~quantile(.x, 0.99), mean = mean, sd=sd),
            .names = "{.col}-{.fn}")) %>%
  pivot_longer(
    cols = -gender,
    names_to = c("variable", "statistic"), 
    names_sep = "-"
  ) %>%
  pivot_wider(
    names_from = statistic,
    values_from = value
  ) -> t1_contin


df_mmfc %>% filter(gender=="women") -> df_wom
df_mmfc %>% filter(gender=="men") -> df_men

# check if 1% and 99% of dummies is indeed 0/1
# c(quantile(df_men$trans_lt, 0.01), quantile(df_men$trans_lt, 0.99))
# c(quantile(df_men$trans_st, 0.01), quantile(df_men$trans_st, 0.99))
# c(quantile(df_men$child_u5, 0.01), quantile(df_men$child_u5, 0.99))
# c(quantile(df_men$temporary_emp, 0.01), quantile(df_men$temporary_emp, 0.99))
# c(quantile(df_men$sector_forpr, 0.01), quantile(df_men$sector_forpr, 0.99))
# c(quantile(df_men$sector_gov, 0.01), quantile(df_men$sector_gov, 0.99))
# c(quantile(df_men$sector_nonpr, 0.01), quantile(df_men$sector_nonpr, 0.99))
# c(quantile(df_men$otherjob, 0.01), quantile(df_men$otherjob, 0.99))
# c(quantile(df_men$partnered, 0.01), quantile(df_men$partnered, 0.99))
# c(quantile(df_wom$trans_lt, 0.01), quantile(df_wom$trans_lt, 0.99))
# c(quantile(df_wom$trans_st, 0.01), quantile(df_wom$trans_st, 0.99))
# c(quantile(df_wom$child_u5, 0.01), quantile(df_wom$child_u5, 0.99))
# c(quantile(df_wom$temporary_emp, 0.01), quantile(df_wom$temporary_emp, 0.99))
# c(quantile(df_wom$sector_forpr, 0.01), quantile(df_wom$sector_forpr, 0.99))
# c(quantile(df_wom$sector_gov, 0.01), quantile(df_wom$sector_gov, 0.99))
# c(quantile(df_wom$sector_nonpr, 0.01), quantile(df_wom$sector_nonpr, 0.99))
# c(quantile(df_wom$otherjob, 0.01), quantile(df_wom$otherjob, 0.99))
# c(quantile(df_wom$partnered, 0.01), quantile(df_wom$partnered, 0.99))
# it is!


t1[c(3,4,7:12,14,18:21,24),1] <- rep(0.0001, times=length(t1[c(3,4,7:12,14,18:21,24),1]))
t1[c(3,4,7:12,14,18:21,24),2] <- rep(1.0001, times=length(t1[c(3,4,7:12,14,18:21,24),2]))
t1[c(3,4,7:12,14,18:21,24),5] <- rep(0.0001, times=length(t1[c(3,4,7:12,14,18:21,24),5]))
t1[c(3,4,7:12,14,18:21,24),6] <- rep(1.0001, times=length(t1[c(3,4,7:12,14,18:21,24),6]))

# time at transition
df_wom %>% filter(trans_st==1) -> womtrans
df_men %>% filter(trans_st==1) -> mentrans

# women 
t1[c(1:2),c(1:4)] <- t1_contin[c(9:10),c(3:6)] # pay
t1[3,3] <- mean(df_wom$trans_st)
t1[4,3] <- mean(df_wom$trans_lt)
t1[5,c(1:4)] <- t1_contin[11, c(3:6)] # time
t1[c(7:11),3]<- round(prop.table(table(df_wom$phd_disci)), digits=2)[c(1:5)] # disci
t1[12,3] <- mean(df_wom$child_u5)
t1[13,c(1:4)] <- t1_contin[12, c(3:6)] # contract hours
t1[14,3] <- mean(df_wom$temporary_emp)
t1[c(15:16),c(1:4)] <- t1_contin[c(13:14),c(3:6)] # phd cohort, satis
t1[18, 3] <- mean(df_wom$sector_forpr)
t1[19, 3] <- mean(df_wom$sector_gov)
t1[20, 3] <- mean(df_wom$sector_nonpr)
t1[21, 3] <- mean(df_wom$otherjob)
t1[c(22:23),c(1:4)] <- t1_contin[c(15:16), c(3:6)] # break job, abroad
t1[24, 3] <- mean(df_wom$partnered)
t1[25, 1] <- quantile(womtrans$t, 0.01)
t1[25, 2] <- quantile(womtrans$t, 0.99)
t1[25, 3] <- mean(womtrans$t)
t1[25, 4] <- sd(womtrans$t)
t1[26, 3] <- nrow(df_wom[!duplicated(df_wom$RINPERSOON),])
t1[27, 3] <- nrow(df_wom)

# men 
t1[c(1:2),c(5:8)] <- t1_contin[c(1:2),c(3:6)] # pay
t1[3,7] <- mean(df_men$trans_st)
t1[4,7] <- mean(df_men$trans_lt)
t1[5,c(5:8)] <- t1_contin[3, c(3:6)] # time
t1[c(7:11),7]<- round(prop.table(table(df_men$phd_disci)), digits=2)[c(1:5)] # disci
t1[12,7] <- mean(df_men$child_u5)
t1[13,c(5:8)] <- t1_contin[4, c(3:6)] # contract hours
t1[14,7] <- mean(df_men$temporary_emp)
t1[c(15:16),c(5:8)] <- t1_contin[c(5:6),c(3:6)] # phd cohort, satis
t1[18, 7] <- mean(df_men$sector_forpr)
t1[19, 7] <- mean(df_men$sector_gov)
t1[20, 7] <- mean(df_men$sector_nonpr)
t1[21, 7] <- mean(df_men$otherjob)
t1[c(22:23),c(5:8)] <- t1_contin[c(7:8), c(3:6)] # break job, abroad
t1[24, 7] <- mean(df_men$partnered)
t1[25, 5] <- quantile(mentrans$t, 0.01)
t1[25, 6] <- quantile(mentrans$t, 0.99)
t1[25, 7] <- mean(mentrans$t)
t1[25, 8] <- sd(mentrans$t)
t1[26, 7] <- nrow(df_men[!duplicated(df_men$RINPERSOON),])
t1[27, 7] <- nrow(df_men)

t1[c(1:25),] <- round(t1[c(1:25),], digits=2)
t1[c(26:27),] <- abs(t1[c(26:27),])

t1[26,] <- round(abs(t1[26,]), digits=0)
t1[27,] <- round(abs(t1[27,]), digits=0)

t1[is.na(t1)] <- ""

Descriptives_Table <- t1

write.csv(t1, file="F:/GPE_salaris/R&R/descriptive_tab.csv")
Table1 <- read.csv(file="Table1.csv", header=TRUE, check.names=FALSE)


Table1[is.na(Table1)] <- ""


kable(Table1, caption = "<b>Table 1: descriptive statistics for all variables used in our analyses, split out for men and women</b>") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Table 1: descriptive statistics for all variables used in our analyses, split out for men and women
F: 1% F: 99% F: Mean F: SD M: 1% M: 99% M: Mean M: SD
Real monthly pay 1085.08 15324.76 4867.1 2776.15 1441.4 17133.96 5529.32 3187.54
Log real monthly pay 6.99 9.64 8.36 0.52 7.27 9.75 8.48 0.51
Transition year 0 1 0.05 0 1 0.05
Transition experienced 0 1 0.32 0 1 0.35
Time 0 16 5.91 4.26 0 16 6.25 4.39
PhD Discipline
Health sciences 0 1 0.43 0 1 0.27
Social sciences 0 1 0.31 0 1 0.23
Natural sciences and mathematics 0 1 0.16 0 1 0.29
Engineering 0 1 0.05 0 1 0.14
Humanities 0 1 0.06 0 1 0.06
Child under 5 0 1 0.33 0 1 0.32
Log monthly contract hours 3.56 5.29 4.93 0.29 4.13 5.29 5.02 0.22
Temporary contract 0 1 0.39 0 1 0.34
PhD cohort 0 12 4.51 3.11 0 11 3.9 2.93
PhD satisfaction 2.12 4 3.14 0.38 2.25 4 3.22 0.37
Sector
For-profit 0 1 0.38 0 1 0.42
Government 0 1 0.48 0 1 0.48
Non-profit 0 1 0.14 0 1 0.1
Other job 0 1 0.06 0 1 0.06
Break in Dutch employment 0 16.99 0.9 4.83 0 16.46 0.84 4.78
Period abroad 0 0 0.43 6.52 0 24.31 0.78 7.97
Partnered 0 1 0.5 0 1 0.58
Time at transition 1 14 3.97 3.13 1 13.54 3.59 3.04
N individuals 2298 2278
N observations 23903 24883

6 Robustness: descriptive statistics of people who go abroad after their PhD

load(file="H:/processed_data/abroadafterphd_long.rda")

levels(as.factor(abroadafterphd_long$phd_disci))

abroadafterphd_long$phd_disci <- factor(abroadafterphd_long$phd_disci, levels=c("Health sciences", "Social sciences", "Natural sciences and mathematics", "Engineering", "Humanities"))

abroadafterphd_long <- abroadafterphd_long %>% 
  mutate(gender = ifelse(gender==1, "men", "women"))

abroadafterphd_long$gender <- factor(abroadafterphd_long$gender, levels=c("men", "women"))

# removing missings
nrow(abroadafterphd_long[!duplicated(abroadafterphd_long$RINPERSOON),]) # 353

abroadafterphd_long %>%
  filter(!is.na(phd_sat)) %>%
  filter(!is.na(temporary_emp)) %>%
  filter(!is.na(sect_adj)) -> abroadafterphd_long

nrow(abroadafterphd_long[!duplicated(abroadafterphd_long$RINPERSOON),]) # 352

# flattening time-varying variables (basepay)

columns <- c("F: 1%", "F: 99%", "F: Mean", "F: SD", "M: 1%", "M: 99%", "M: Mean", "M: SD")
rows <- c("Real monthly pay", "Log real monthly pay", "Transition year", "Transition experienced","Time" , "PhD Discipline", "Health sciences", "Social sciences", "Natural sciences and mathematics", "Engineering", "Humanities", "Child under 5", "Log monthly contract hours", "Temporary contract", "PhD cohort", "PhD satisfaction", "Sector", "For-profit", "Government", "Non-profit", "Other job", "Break in Dutch employment", "Period abroad", "Partnered", "Time at transition", "N individuals", "N observations")
  
t1_abroad <- data.frame(matrix(nrow=length(rows), ncol=length(columns)))

colnames(t1_abroad) <- columns
rownames(t1_abroad) <- rows

abroadafterphd_long %>%
  group_by(gender) %>%
  summarize(across(c(realpay_corr2, log_realpay, t, basehours_month, phd_coh, phd_sat, break_job, abroad_time),
            list(p1 = ~quantile(.x, 0.01), p99 = ~quantile(.x, 0.99), mean = mean, sd=sd),
            .names = "{.col}-{.fn}")) %>%
  pivot_longer(
    cols = -gender,
    names_to = c("variable", "statistic"), 
    names_sep = "-"
  ) %>%
  pivot_wider(
    names_from = statistic,
    values_from = value
  ) -> t1_abroad_contin


abroadafterphd_long %>% filter(gender=="women") -> df_wom
abroadafterphd_long %>% filter(gender=="men") -> df_men

# check if 1% and 99% of dummies is indeed 0/1
# c(quantile(df_men$trans_lt, 0.01), quantile(df_men$trans_lt, 0.99))
# c(quantile(df_men$trans_st, 0.01), quantile(df_men$trans_st, 0.99))
# c(quantile(df_men$child_u5, 0.01), quantile(df_men$child_u5, 0.99))
# c(quantile(df_men$temporary_emp, 0.01), quantile(df_men$temporary_emp, 0.99))
# c(quantile(df_men$sector_forpr, 0.01), quantile(df_men$sector_forpr, 0.99))
# c(quantile(df_men$sector_gov, 0.01), quantile(df_men$sector_gov, 0.99))
# c(quantile(df_men$sector_nonpr, 0.01), quantile(df_men$sector_nonpr, 0.99))
# c(quantile(df_men$otherjob, 0.01), quantile(df_men$otherjob, 0.99))
# c(quantile(df_men$partnered, 0.01), quantile(df_men$partnered, 0.99))
# c(quantile(df_wom$trans_lt, 0.01), quantile(df_wom$trans_lt, 0.99))
# c(quantile(df_wom$trans_st, 0.01), quantile(df_wom$trans_st, 0.99))
# c(quantile(df_wom$child_u5, 0.01), quantile(df_wom$child_u5, 0.99))
# c(quantile(df_wom$temporary_emp, 0.01), quantile(df_wom$temporary_emp, 0.99))
# c(quantile(df_wom$sector_forpr, 0.01), quantile(df_wom$sector_forpr, 0.99))
# c(quantile(df_wom$sector_gov, 0.01), quantile(df_wom$sector_gov, 0.99))
# c(quantile(df_wom$sector_nonpr, 0.01), quantile(df_wom$sector_nonpr, 0.99))
# c(quantile(df_wom$otherjob, 0.01), quantile(df_wom$otherjob, 0.99))
# c(quantile(df_wom$partnered, 0.01), quantile(df_wom$partnered, 0.99))
# it is!


t1_abroad[c(3,4,7:12,14,18:21,24),1] <- rep(0.0001, times=length(t1_abroad[c(3,4,7:12,14,18:21,24),1]))
t1_abroad[c(3,4,7:12,14,18:21,24),2] <- rep(1.0001, times=length(t1_abroad[c(3,4,7:12,14,18:21,24),2]))
t1_abroad[c(3,4,7:12,14,18:21,24),5] <- rep(0.0001, times=length(t1_abroad[c(3,4,7:12,14,18:21,24),5]))
t1_abroad[c(3,4,7:12,14,18:21,24),6] <- rep(1.0001, times=length(t1_abroad[c(3,4,7:12,14,18:21,24),6]))

# time at transition
df_wom %>% filter(trans_st==1) -> womtrans
df_men %>% filter(trans_st==1) -> mentrans

# women 
t1_abroad[c(1:2),c(1:4)] <- t1_abroad_contin[c(9:10),c(3:6)] # pay
t1_abroad[3,3] <- mean(df_wom$trans_st)
t1_abroad[4,3] <- mean(df_wom$trans_lt)
t1_abroad[5,c(1:4)] <- t1_abroad_contin[11, c(3:6)] # time
t1_abroad[c(7:11),3]<- round(prop.table(table(df_wom$phd_disci)), digits=2)[c(1:5)] # disci
t1_abroad[12,3] <- mean(df_wom$child_u5)
t1_abroad[13,c(1:4)] <- t1_abroad_contin[12, c(3:6)] # contract hours
t1_abroad[14,3] <- mean(df_wom$temporary_emp)
t1_abroad[c(15:16),c(1:4)] <- t1_abroad_contin[c(13:14),c(3:6)] # phd cohort, satis
t1_abroad[18, 3] <- mean(df_wom$sector_forpr)
t1_abroad[19, 3] <- mean(df_wom$sector_gov)
t1_abroad[20, 3] <- mean(df_wom$sector_nonpr)
t1_abroad[21, 3] <- mean(df_wom$otherjob)
t1_abroad[c(22:23),c(1:4)] <- t1_abroad_contin[c(15:16), c(3:6)] # break job, abroad
t1_abroad[24, 3] <- mean(df_wom$partnered)
t1_abroad[25, 1] <- quantile(womtrans$t, 0.01)
t1_abroad[25, 2] <- quantile(womtrans$t, 0.99)
t1_abroad[25, 3] <- mean(womtrans$t)
t1_abroad[25, 4] <- sd(womtrans$t)
t1_abroad[26, 3] <- nrow(df_wom[!duplicated(df_wom$RINPERSOON),])
t1_abroad[27, 3] <- nrow(df_wom)

# men 
t1_abroad[c(1:2),c(5:8)] <- t1_abroad_contin[c(1:2),c(3:6)] # pay
t1_abroad[3,7] <- mean(df_men$trans_st)
t1_abroad[4,7] <- mean(df_men$trans_lt)
t1_abroad[5,c(5:8)] <- t1_abroad_contin[3, c(3:6)] # time
t1_abroad[c(7:11),7]<- round(prop.table(table(df_men$phd_disci)), digits=2)[c(1:5)] # disci
t1_abroad[12,7] <- mean(df_men$child_u5)
t1_abroad[13,c(5:8)] <- t1_abroad_contin[4, c(3:6)] # contract hours
t1_abroad[14,7] <- mean(df_men$temporary_emp)
t1_abroad[c(15:16),c(5:8)] <- t1_abroad_contin[c(5:6),c(3:6)] # phd cohort, satis
t1_abroad[18, 7] <- mean(df_men$sector_forpr)
t1_abroad[19, 7] <- mean(df_men$sector_gov)
t1_abroad[20, 7] <- mean(df_men$sector_nonpr)
t1_abroad[21, 7] <- mean(df_men$otherjob)
t1_abroad[c(22:23),c(5:8)] <- t1_abroad_contin[c(7:8), c(3:6)] # break job, abroad
t1_abroad[24, 7] <- mean(df_men$partnered)
t1_abroad[25, 5] <- quantile(mentrans$t, 0.01)
t1_abroad[25, 6] <- quantile(mentrans$t, 0.99)
t1_abroad[25, 7] <- mean(mentrans$t)
t1_abroad[25, 8] <- sd(mentrans$t)
t1_abroad[26, 7] <- nrow(df_men[!duplicated(df_men$RINPERSOON),])
t1_abroad[27, 7] <- nrow(df_men)

t1_abroad[c(1:25),] <- round(t1_abroad[c(1:25),], digits=2)
t1_abroad[c(26:27),] <- abs(t1_abroad[c(26:27),])

t1_abroad[26,] <- round(abs(t1_abroad[26,]), digits=0)
t1_abroad[27,] <- round(abs(t1_abroad[27,]), digits=0)

t1_abroad[is.na(t1_abroad)] <- ""

t1_abroad <- t1_abroad[c(1:2,4:23,26:27),] # removing transition columns as these don't make sense

t1_abroad <- t1_abroad[,c(3,4,7,8)] # no min/max due to sample size restrictions


Descriptives_Table <- t1_abroad

write.csv(t1_abroad, file="F:/GPE_salaris/R&R/descriptive_tab_abroadphd.csv")
Table1_abroadphd <- read.csv(file="Table1_abroadphd.csv", header=TRUE, check.names=FALSE)

Table1_abroadphd[is.na(Table1_abroadphd)] <- ""

kable(Table1_abroadphd, caption = "<b>Table: descriptive statistics for PhDs who went abroad after their PhD</b>") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Table: descriptive statistics for PhDs who went abroad after their PhD
F: Mean F: SD M: Mean M: SD
Real monthly pay 4858.36 2015.94 5562.04 2651.9
Log real monthly pay 8.41 0.41 8.53 0.43
Transition experienced 0.58 0.55
Time 8.36 3.85 8.52 3.84
PhD Discipline
Health sciences 0.24 0.18
Social sciences 0.18 0.07
Natural sciences and mathematics 0.52 0.58
Engineering 0.05 0.13
Humanities 0.01 0.04
Child under 5 0.38 0.36
Log monthly contract hours 4.97 0.24 5.05 0.19
Temporary contract 0.37 0.37
PhD cohort 3.36 2.82 2.87 2.55
PhD satisfaction 3.21 0.35 3.23 0.42
Sector
For-profit 0.48 0.52
Government 0.42 0.41
Non-profit 0.09 0.07
Other job 0.02 0.02
Break in Dutch employment 0.87 4.41 0.61 3.05
Period abroad 6.95 20.91 9.16 27.04
N individuals 130 222
N observations 1258 2199

T-test table: are people who go abroad after their PhD different from those who do not?

# chi-square gender distri:
gender_reg <- table(df_mmfc$gender[!duplicated(df_mmfc$RINPERSOON)])
gender_abr <- table(abroadafterphd_long$gender[!duplicated(abroadafterphd_long$RINPERSOON)])
gender_distri <- rbind.data.frame(gender_reg, gender_abr)

rownames(gender_distri) <- c("Main data", "Abroad after PhD")
colnames(gender_distri) <- c("Men", "Women")

chisq_gender <- chisq.test(gender_distri)

gender_distri$chisq <- chisq_gender$statistic
gender_distri$chisq_df <- chisq_gender$parameter
gender_distri$chisq_p <- chisq_gender$p.value

write.csv(gender_distri, file="F:/GPE_salaris/R&R/gender_distri_abroad.csv")

df_mmfc %>%
  mutate(disci_health = ifelse(phd_disci=="Health sciences", 1, 0),
         disci_social = ifelse(phd_disci=="Social sciences", 1, 0),
         disci_natural = ifelse(phd_disci=="Natural sciences and mathematics", 1, 0),
         disci_engineering = ifelse(phd_disci=="Engineering", 1, 0),
         disci_humanities = ifelse(phd_disci=="Humanities", 1, 0)) -> df_mmfc


abroadafterphd_long %>%
  mutate(disci_health = ifelse(phd_disci=="Health sciences", 1, 0),
         disci_social = ifelse(phd_disci=="Social sciences", 1, 0),
         disci_natural = ifelse(phd_disci=="Natural sciences and mathematics", 1, 0),
         disci_engineering = ifelse(phd_disci=="Engineering", 1, 0),
         disci_humanities = ifelse(phd_disci=="Humanities", 1, 0)) -> abroadafterphd_long



df_mmfc %>% filter(gender=="women") -> df_wom
df_mmfc %>% filter(gender=="men") -> df_men
abroadafterphd_long %>% filter(gender=="women") -> df_wom_a
abroadafterphd_long %>% filter(gender=="men") -> df_men_a

t1_v <- t.test(df_wom$realpay_corr2, df_wom_a$realpay_corr2)
t2_v <- t.test(df_wom$log_realpay, df_wom_a$log_realpay)
t3_v <- t.test(df_wom$t, df_wom_a$t)
t4_v <- t.test(df_wom$disci_health, df_wom_a$disci_health)
t5_v <- t.test(df_wom$disci_social, df_wom_a$disci_social)
t6_v <- t.test(df_wom$disci_natural, df_wom_a$disci_natural)
t7_v <- t.test(df_wom$disci_engineering, df_wom_a$disci_engineering)
t8_v <- t.test(df_wom$disci_humanities, df_wom_a$disci_humanities)
t9_v <- t.test(df_wom$child_u5, df_wom_a$child_u5)
t10_v <- t.test(df_wom$log_hrs, df_wom_a$log_hrs)
t11_v <- t.test(df_wom$temporary_emp, df_wom_a$temporary_emp)
t12_v <- t.test(df_wom$phd_coh, df_wom_a$phd_coh)
t13_v <- t.test(df_wom$phd_sat, df_wom_a$phd_sat)
t14_v <- t.test(df_wom$sector_forpr, df_wom_a$sector_forpr)
t15_v <- t.test(df_wom$sector_gov, df_wom_a$sector_gov)
t16_v <- t.test(df_wom$sector_nonpr, df_wom_a$sector_nonpr)
t17_v <- t.test(df_wom$otherjob, df_wom_a$otherjob)
t18_v <- t.test(df_wom$break_job, df_wom_a$break_job)
t19_v <- t.test(df_wom$abroad_time, df_wom_a$abroad_time)
t20_v <- t.test(df_wom$partnered, df_wom_a$partnered)


t1_m <- t.test(df_men$realpay_corr2, df_men_a$realpay_corr2)
t2_m <- t.test(df_men$log_realpay, df_men_a$log_realpay)
t3_m <- t.test(df_men$t, df_men_a$t)
t4_m <- t.test(df_men$disci_health, df_men_a$disci_health)
t5_m <- t.test(df_men$disci_social, df_men_a$disci_social)
t6_m <- t.test(df_men$disci_natural, df_men_a$disci_natural)
t7_m <- t.test(df_men$disci_engineering, df_men_a$disci_engineering)
t8_m <- t.test(df_men$disci_humanities, df_men_a$disci_humanities)
t9_m <- t.test(df_men$child_u5, df_men_a$child_u5)
t10_m <- t.test(df_men$log_hrs, df_men_a$log_hrs)
t11_m <- t.test(df_men$temporary_emp, df_men_a$temporary_emp)
t12_m <- t.test(df_men$phd_coh, df_men_a$phd_coh)
t13_m <- t.test(df_men$phd_sat, df_men_a$phd_sat)
t14_m <- t.test(df_men$sector_forpr, df_men_a$sector_forpr)
t15_m <- t.test(df_men$sector_gov, df_men_a$sector_gov)
t16_m <- t.test(df_men$sector_nonpr, df_men_a$sector_nonpr)
t17_m <- t.test(df_men$otherjob, df_men_a$otherjob)
t18_m <- t.test(df_men$break_job, df_men_a$break_job)
t19_m <- t.test(df_men$abroad_time, df_men_a$abroad_time)
t20_m <- t.test(df_men$partnered, df_men_a$partnered)

extract_t <- function(ttest) {
  tibble(
    t_value = unname(ttest$statistic),
    df = unname(ttest$parameter),
    p_value = unname(ttest$p.value)
  )
}

test_wom <- mget(paste0("t", 1:20, "_v"))
test_men <- mget(paste0("t", 1:20, "_m"))


varnames <- c("Real monthly pay", "Log real monthly pay", "Time", "PhD discipline: Health sciences", "PhD discipline: Social sciences", "PhD discipline: Natural sciences & mathematics", "PhD discipline: Engineering", "PhD discipline: Humanities", "Child under 5", "Log Monthly contract hours", "Temporary contract", "PhD cohort", "PhD satisfaction", "Sector: For-profit", "Sector: Government", "Sector:Non-profit", "Other job", "Break in Dutch employment", "Period abroad", "Partnered")


test_df_wom <- bind_rows(lapply(test_wom, extract_t), .id="test_id") %>%
  mutate(variable=varnames,
         N_obs = (nrow(df_wom) + nrow(df_wom_a)),
         group="women")

test_df_men <- bind_rows(lapply(test_men, extract_t), .id="test_id") %>%
  mutate(variable=varnames, 
         N_obs = (nrow(df_men) + nrow(df_men_a)),
         group="men")


test_df_all <- bind_rows(test_df_men, test_df_wom)

test_df_all$psmall <- ifelse(test_df_all$p_value<.001, 1, 0)


test_df_all$ttest <- paste0("t(", round(test_df_all$df, 2), ") = ", round(test_df_all$t_value, 2), ", p ", ifelse(test_df_all$psmall==1, "< .001", paste0("= ", round(test_df_all$p_value, 2))))


write.csv(test_df_all, file="F:/GPE_salaris/R&R/ttests_main_abroad.csv")
ttests_mainabroad <- read.csv(file="ttests_main_abroad.csv", header=TRUE, check.names = FALSE)

ttests_mainabroad[,-1]
##    test_id      t_value       df       p_value
## 1     t1_m  -0.54482884 2791.092  5.859148e-01
## 2     t2_m  -4.91808711 2782.432  9.247609e-07
## 3     t3_m -26.28683748 2731.112 5.864767e-136
## 4     t4_m  10.58758396 2745.622  1.057409e-25
## 5     t5_m  26.88090259 3393.241 1.843293e-144
## 6     t6_m -26.59154389 2536.902 1.219750e-137
## 7     t7_m   1.23312866 2626.082  2.176381e-01
## 8     t8_m   6.41187345 2901.868  1.672075e-10
## 9     t9_m  -3.72901434 2578.664  1.963702e-04
## 10   t10_m  -7.48455912 2787.712  9.570685e-14
## 11   t11_m  -2.91269103 2585.656  3.614001e-03
## 12   t12_m  17.78638679 2734.406  4.679438e-67
## 13   t13_m  -1.17182236 2502.966  2.413799e-01
## 14   t14_m  -9.06312947 2590.921  2.435695e-19
## 15   t15_m   6.30574697 2613.798  3.356219e-10
## 16   t16_m   5.56488385 2780.548  2.873351e-08
## 17   t17_m  10.47898127 3288.980  2.688617e-25
## 18   t18_m   3.26277165 3242.145  1.114724e-03
## 19   t19_m -14.48310051 2231.901  1.677765e-45
## 20   t20_m  -6.02140886 2629.183  1.970343e-09
## 21    t1_v   0.14665298 1519.638  8.834254e-01
## 22    t2_v  -4.31270458 1473.152  1.719924e-05
## 23    t3_v -21.85057698 1424.003  1.641672e-91
## 24    t4_v  15.27586101 1440.897  6.090731e-49
## 25    t5_v  11.46403537 1455.524  3.431097e-29
## 26    t6_v -25.51582615 1327.420 3.507584e-117
## 27    t7_v   0.03913606 1393.127  9.687875e-01
## 28    t8_v  13.71807152 1973.628  5.632661e-41
## 29    t9_v  -3.46311953 1384.384  5.502304e-04
## 30   t10_v  -6.49330210 1447.355  1.151181e-10
## 31   t11_v   1.40339762 1395.222  1.607208e-01
## 32   t12_v  13.98859348 1422.669  9.322857e-42
## 33   t13_v  -7.10179205 1418.733  1.941450e-12
## 34   t14_v  -7.38925817 1384.287  2.543458e-13
## 35   t15_v   4.33908548 1395.738  1.533749e-05
## 36   t16_v   5.25756723 1449.498  1.678162e-07
## 37   t17_v  11.39910565 1751.582  4.353029e-29
## 38   t18_v   0.25543702 1419.811  7.984226e-01
## 39   t19_v -11.03042334 1269.902  4.420890e-27
## 40   t20_v  -0.64049890 1392.486  5.219537e-01
##                                          variable N_obs group psmall
## 1                                Real monthly pay 27082   men      0
## 2                            Log real monthly pay 27082   men      1
## 3                                            Time 27082   men      1
## 4                 PhD discipline: Health sciences 27082   men      1
## 5                 PhD discipline: Social sciences 27082   men      1
## 6  PhD discipline: Natural sciences & mathematics 27082   men      1
## 7                     PhD discipline: Engineering 27082   men      0
## 8                      PhD discipline: Humanities 27082   men      1
## 9                                   Child under 5 27082   men      1
## 10                     Log Monthly contract hours 27082   men      1
## 11                             Temporary contract 27082   men      0
## 12                                     PhD cohort 27082   men      1
## 13                               PhD satisfaction 27082   men      0
## 14                             Sector: For-profit 27082   men      1
## 15                             Sector: Government 27082   men      1
## 16                              Sector:Non-profit 27082   men      1
## 17                                      Other job 27082   men      1
## 18                      Break in Dutch employment 27082   men      0
## 19                                  Period abroad 27082   men      1
## 20                                      Partnered 27082   men      1
## 21                               Real monthly pay 25161 women      0
## 22                           Log real monthly pay 25161 women      1
## 23                                           Time 25161 women      1
## 24                PhD discipline: Health sciences 25161 women      1
## 25                PhD discipline: Social sciences 25161 women      1
## 26 PhD discipline: Natural sciences & mathematics 25161 women      1
## 27                    PhD discipline: Engineering 25161 women      0
## 28                     PhD discipline: Humanities 25161 women      1
## 29                                  Child under 5 25161 women      1
## 30                     Log Monthly contract hours 25161 women      1
## 31                             Temporary contract 25161 women      0
## 32                                     PhD cohort 25161 women      1
## 33                               PhD satisfaction 25161 women      1
## 34                             Sector: For-profit 25161 women      1
## 35                             Sector: Government 25161 women      1
## 36                              Sector:Non-profit 25161 women      1
## 37                                      Other job 25161 women      1
## 38                      Break in Dutch employment 25161 women      0
## 39                                  Period abroad 25161 women      1
## 40                                      Partnered 25161 women      0
##                            ttest
## 1   t(2791.09) = -0.54, p = 0.59
## 2   t(2782.43) = -4.92, p < .001
## 3  t(2731.11) = -26.29, p < .001
## 4   t(2745.62) = 10.59, p < .001
## 5   t(3393.24) = 26.88, p < .001
## 6   t(2536.9) = -26.59, p < .001
## 7    t(2626.08) = 1.23, p = 0.22
## 8    t(2901.87) = 6.41, p < .001
## 9   t(2578.66) = -3.73, p < .001
## 10  t(2787.71) = -7.48, p < .001
## 11     t(2585.66) = -2.91, p = 0
## 12  t(2734.41) = 17.79, p < .001
## 13  t(2502.97) = -1.17, p = 0.24
## 14  t(2590.92) = -9.06, p < .001
## 15    t(2613.8) = 6.31, p < .001
## 16   t(2780.55) = 5.56, p < .001
## 17  t(3288.98) = 10.48, p < .001
## 18      t(3242.15) = 3.26, p = 0
## 19  t(2231.9) = -14.48, p < .001
## 20  t(2629.18) = -6.02, p < .001
## 21   t(1519.64) = 0.15, p = 0.88
## 22  t(1473.15) = -4.31, p < .001
## 23    t(1424) = -21.85, p < .001
## 24   t(1440.9) = 15.28, p < .001
## 25  t(1455.52) = 11.46, p < .001
## 26 t(1327.42) = -25.52, p < .001
## 27   t(1393.13) = 0.04, p = 0.97
## 28  t(1973.63) = 13.72, p < .001
## 29  t(1384.38) = -3.46, p < .001
## 30  t(1447.36) = -6.49, p < .001
## 31    t(1395.22) = 1.4, p = 0.16
## 32  t(1422.67) = 13.99, p < .001
## 33   t(1418.73) = -7.1, p < .001
## 34  t(1384.29) = -7.39, p < .001
## 35   t(1395.74) = 4.34, p < .001
## 36    t(1449.5) = 5.26, p < .001
## 37   t(1751.58) = 11.4, p < .001
## 38    t(1419.81) = 0.26, p = 0.8
## 39  t(1269.9) = -11.03, p < .001
## 40  t(1392.49) = -0.64, p = 0.52
kable(ttests_mainabroad, caption = "<b>T-tests comparing PhDs who went abroad after their PhD and those who did not</b>") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
T-tests comparing PhDs who went abroad after their PhD and those who did not
test_id t_value df p_value variable N_obs group psmall ttest
1 t1_m -0.5448288 2791.092 0.5859148 Real monthly pay 27082 men 0 t(2791.09) = -0.54, p = 0.59
2 t2_m -4.9180871 2782.432 0.0000009 Log real monthly pay 27082 men 1 t(2782.43) = -4.92, p < .001
3 t3_m -26.2868375 2731.112 0.0000000 Time 27082 men 1 t(2731.11) = -26.29, p < .001
4 t4_m 10.5875840 2745.622 0.0000000 PhD discipline: Health sciences 27082 men 1 t(2745.62) = 10.59, p < .001
5 t5_m 26.8809026 3393.241 0.0000000 PhD discipline: Social sciences 27082 men 1 t(3393.24) = 26.88, p < .001
6 t6_m -26.5915439 2536.902 0.0000000 PhD discipline: Natural sciences & mathematics 27082 men 1 t(2536.9) = -26.59, p < .001
7 t7_m 1.2331287 2626.082 0.2176381 PhD discipline: Engineering 27082 men 0 t(2626.08) = 1.23, p = 0.22
8 t8_m 6.4118734 2901.868 0.0000000 PhD discipline: Humanities 27082 men 1 t(2901.87) = 6.41, p < .001
9 t9_m -3.7290143 2578.664 0.0001964 Child under 5 27082 men 1 t(2578.66) = -3.73, p < .001
10 t10_m -7.4845591 2787.712 0.0000000 Log Monthly contract hours 27082 men 1 t(2787.71) = -7.48, p < .001
11 t11_m -2.9126910 2585.656 0.0036140 Temporary contract 27082 men 0 t(2585.66) = -2.91, p = 0
12 t12_m 17.7863868 2734.406 0.0000000 PhD cohort 27082 men 1 t(2734.41) = 17.79, p < .001
13 t13_m -1.1718224 2502.966 0.2413799 PhD satisfaction 27082 men 0 t(2502.97) = -1.17, p = 0.24
14 t14_m -9.0631295 2590.921 0.0000000 Sector: For-profit 27082 men 1 t(2590.92) = -9.06, p < .001
15 t15_m 6.3057470 2613.798 0.0000000 Sector: Government 27082 men 1 t(2613.8) = 6.31, p < .001
16 t16_m 5.5648839 2780.548 0.0000000 Sector:Non-profit 27082 men 1 t(2780.55) = 5.56, p < .001
17 t17_m 10.4789813 3288.980 0.0000000 Other job 27082 men 1 t(3288.98) = 10.48, p < .001
18 t18_m 3.2627717 3242.145 0.0011147 Break in Dutch employment 27082 men 0 t(3242.15) = 3.26, p = 0
19 t19_m -14.4831005 2231.901 0.0000000 Period abroad 27082 men 1 t(2231.9) = -14.48, p < .001
20 t20_m -6.0214089 2629.183 0.0000000 Partnered 27082 men 1 t(2629.18) = -6.02, p < .001
21 t1_v 0.1466530 1519.638 0.8834254 Real monthly pay 25161 women 0 t(1519.64) = 0.15, p = 0.88
22 t2_v -4.3127046 1473.152 0.0000172 Log real monthly pay 25161 women 1 t(1473.15) = -4.31, p < .001
23 t3_v -21.8505770 1424.003 0.0000000 Time 25161 women 1 t(1424) = -21.85, p < .001
24 t4_v 15.2758610 1440.897 0.0000000 PhD discipline: Health sciences 25161 women 1 t(1440.9) = 15.28, p < .001
25 t5_v 11.4640354 1455.524 0.0000000 PhD discipline: Social sciences 25161 women 1 t(1455.52) = 11.46, p < .001
26 t6_v -25.5158262 1327.420 0.0000000 PhD discipline: Natural sciences & mathematics 25161 women 1 t(1327.42) = -25.52, p < .001
27 t7_v 0.0391361 1393.127 0.9687875 PhD discipline: Engineering 25161 women 0 t(1393.13) = 0.04, p = 0.97
28 t8_v 13.7180715 1973.628 0.0000000 PhD discipline: Humanities 25161 women 1 t(1973.63) = 13.72, p < .001
29 t9_v -3.4631195 1384.384 0.0005502 Child under 5 25161 women 1 t(1384.38) = -3.46, p < .001
30 t10_v -6.4933021 1447.355 0.0000000 Log Monthly contract hours 25161 women 1 t(1447.36) = -6.49, p < .001
31 t11_v 1.4033976 1395.222 0.1607208 Temporary contract 25161 women 0 t(1395.22) = 1.4, p = 0.16
32 t12_v 13.9885935 1422.669 0.0000000 PhD cohort 25161 women 1 t(1422.67) = 13.99, p < .001
33 t13_v -7.1017921 1418.733 0.0000000 PhD satisfaction 25161 women 1 t(1418.73) = -7.1, p < .001
34 t14_v -7.3892582 1384.287 0.0000000 Sector: For-profit 25161 women 1 t(1384.29) = -7.39, p < .001
35 t15_v 4.3390855 1395.738 0.0000153 Sector: Government 25161 women 1 t(1395.74) = 4.34, p < .001
36 t16_v 5.2575672 1449.498 0.0000002 Sector:Non-profit 25161 women 1 t(1449.5) = 5.26, p < .001
37 t17_v 11.3991057 1751.582 0.0000000 Other job 25161 women 1 t(1751.58) = 11.4, p < .001
38 t18_v 0.2554370 1419.811 0.7984226 Break in Dutch employment 25161 women 0 t(1419.81) = 0.26, p = 0.8
39 t19_v -11.0304233 1269.902 0.0000000 Period abroad 25161 women 1 t(1269.9) = -11.03, p < .001
40 t20_v -0.6404989 1392.486 0.5219537 Partnered 25161 women 0 t(1392.49) = -0.64, p = 0.52

7 Other job

Are people with/out an other job more likely to make a transition?

df_mmfc %>%
  group_by(RINPERSOON) %>%
  summarize(evertrans = max(trans_lt)) -> evertrans

df_mmfc %>%
  group_by(RINPERSOON) %>%
  filter(uni==1) %>%
  summarize(otherjob_m = max(otherjob),
            time_uni = max(t)) -> checks

evertrans %>%
  left_join(checks, by=c("RINPERSOON")) -> evertrans_otherjob

trans_by_oj <- as.data.frame(table("Will Transition" = evertrans_otherjob$evertrans, "Other job" = evertrans_otherjob$otherjob_m))

write.csv(trans_by_oj, file="F:/GPE_salaris/R&R/transitions_otherjob.csv")



df_other %>%
  group_by(uni, gender) %>%
  summarise(mean_perc_pay_job1 = mean(perc_pay_job1),
            sd_perc_pay_job1 = sd(perc_pay_job1),
            mean_perc_hrs_job1 = mean(perc_hrs_job1),
            sd_perc_hrs_job1 = sd(perc_hrs_job1),
            n = n(),
            n_obs = n_distinct(RINPERSOON)) -> otherjob_summ_gender

df_other %>%
  group_by(uni) %>%
  summarise(mean_perc_pay_job1 = mean(perc_pay_job1),
            sd_perc_pay_job1 = sd(perc_pay_job1),
            mean_perc_hrs_job1 = mean(perc_hrs_job1),
            sd_perc_hrs_job1 = sd(perc_hrs_job1),
            n = n(),
            n_obs = n_distinct(RINPERSOON)) %>%
  mutate(gender = "all") -> otherjob_summ

otherjob_summ <- rbind.data.frame(otherjob_summ_gender, otherjob_summ)


write.csv(otherjob_summ, file="F:/GPE_salaris/R&R/descriptive_tab_otherjob.csv")
transitions_otherjob <- read.csv(file="transitions_otherjob.csv", header = TRUE, check.names = FALSE)

# how often do people with (1) and without an other job (0) make a transition?
transitions_otherjob
##   Will.Transition Other.job Freq
## 1               0         0 1783
## 2               1         0 2046
## 3               0         1  446
## 4               1         1  301
# Of people with an extra job, what percentage of their earnings and work hours come from their main job? Split out by gender (1=men;2=women) and whether their main job is at the uni (0/1)
Table1_otherjob <- read.csv(file="Table1_otherjob.csv", header = TRUE, check.names = FALSE)

Table1_otherjob[,-1]
##   uni gender mean_perc_pay_job1 sd_perc_pay_job1 mean_perc_hrs_job1
## 1   0      1          0.7475617        0.1505000          0.7064214
## 2   0      2          0.7269163        0.1398552          0.6990669
## 3   1      1          0.7341327        0.1438970          0.7156083
## 4   1      2          0.6970699        0.1412620          0.6792625
## 5   0    all          0.7369286        0.1454112          0.7026336
## 6   1    all          0.7152258        0.1437217          0.6970672
##   sd_perc_hrs_job1 n_obs n_individual
## 1        0.1612090   500          198
## 2        0.1450039   531          234
## 3        0.1550529   943          354
## 4        0.1479073   982          393
## 5        0.1530466  1031          432
## 6        0.1524972  1925          747
kable(Table1_otherjob, caption = "<b>Percentage of earnings and work hours hauled from main job, for researchers with more than 1 job</b>") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Percentage of earnings and work hours hauled from main job, for researchers with more than 1 job
uni gender mean_perc_pay_job1 sd_perc_pay_job1 mean_perc_hrs_job1 sd_perc_hrs_job1 n_obs n_individual
1 0 1 0.7475617 0.1505000 0.7064214 0.1612090 500 198
2 0 2 0.7269163 0.1398552 0.6990669 0.1450039 531 234
3 1 1 0.7341327 0.1438970 0.7156083 0.1550529 943 354
4 1 2 0.6970699 0.1412620 0.6792625 0.1479073 982 393
5 0 all 0.7369286 0.1454112 0.7026336 0.1530466 1031 432
6 1 all 0.7152258 0.1437217 0.6970672 0.1524972 1925 747



Copyright © 2025