Descriptives
Descriptive table by
gender
# flattening time-varying variables (basepay)
columns <- c("F: 1%", "F: 99%", "F: Mean", "F: SD", "M: 1%", "M: 99%", "M: Mean", "M: SD")
rows <- c("Real monthly pay", "Log real monthly pay", "Transition year", "Transition experienced","Time" , "PhD Discipline", "Health sciences", "Social sciences", "Natural sciences and mathematics", "Engineering", "Humanities", "Child under 5", "Log monthly contract hours", "Temporary contract", "PhD cohort", "PhD satisfaction", "Sector", "For-profit", "Government", "Non-profit", "Other job", "Break in Dutch employment", "Period abroad", "Partnered", "Time at transition", "N individuals", "N observations")
t1 <- data.frame(matrix(nrow=length(rows), ncol=length(columns)))
colnames(t1) <- columns
rownames(t1) <- rows
df_mmfc %>%
group_by(gender) %>%
summarize(across(c(realpay_corr2, log_realpay, t, log_hrs, phd_coh, phd_sat, break_job, abroad_time),
list(p1 = ~quantile(.x, 0.01), p99 = ~quantile(.x, 0.99), mean = mean, sd=sd),
.names = "{.col}-{.fn}")) %>%
pivot_longer(
cols = -gender,
names_to = c("variable", "statistic"),
names_sep = "-"
) %>%
pivot_wider(
names_from = statistic,
values_from = value
) -> t1_contin
df_mmfc %>% filter(gender=="women") -> df_wom
df_mmfc %>% filter(gender=="men") -> df_men
# check if 1% and 99% of dummies is indeed 0/1
# c(quantile(df_men$trans_lt, 0.01), quantile(df_men$trans_lt, 0.99))
# c(quantile(df_men$trans_st, 0.01), quantile(df_men$trans_st, 0.99))
# c(quantile(df_men$child_u5, 0.01), quantile(df_men$child_u5, 0.99))
# c(quantile(df_men$temporary_emp, 0.01), quantile(df_men$temporary_emp, 0.99))
# c(quantile(df_men$sector_forpr, 0.01), quantile(df_men$sector_forpr, 0.99))
# c(quantile(df_men$sector_gov, 0.01), quantile(df_men$sector_gov, 0.99))
# c(quantile(df_men$sector_nonpr, 0.01), quantile(df_men$sector_nonpr, 0.99))
# c(quantile(df_men$otherjob, 0.01), quantile(df_men$otherjob, 0.99))
# c(quantile(df_men$partnered, 0.01), quantile(df_men$partnered, 0.99))
# c(quantile(df_wom$trans_lt, 0.01), quantile(df_wom$trans_lt, 0.99))
# c(quantile(df_wom$trans_st, 0.01), quantile(df_wom$trans_st, 0.99))
# c(quantile(df_wom$child_u5, 0.01), quantile(df_wom$child_u5, 0.99))
# c(quantile(df_wom$temporary_emp, 0.01), quantile(df_wom$temporary_emp, 0.99))
# c(quantile(df_wom$sector_forpr, 0.01), quantile(df_wom$sector_forpr, 0.99))
# c(quantile(df_wom$sector_gov, 0.01), quantile(df_wom$sector_gov, 0.99))
# c(quantile(df_wom$sector_nonpr, 0.01), quantile(df_wom$sector_nonpr, 0.99))
# c(quantile(df_wom$otherjob, 0.01), quantile(df_wom$otherjob, 0.99))
# c(quantile(df_wom$partnered, 0.01), quantile(df_wom$partnered, 0.99))
# it is!
t1[c(3,4,7:12,14,18:21,24),1] <- rep(0.0001, times=length(t1[c(3,4,7:12,14,18:21,24),1]))
t1[c(3,4,7:12,14,18:21,24),2] <- rep(1.0001, times=length(t1[c(3,4,7:12,14,18:21,24),2]))
t1[c(3,4,7:12,14,18:21,24),5] <- rep(0.0001, times=length(t1[c(3,4,7:12,14,18:21,24),5]))
t1[c(3,4,7:12,14,18:21,24),6] <- rep(1.0001, times=length(t1[c(3,4,7:12,14,18:21,24),6]))
# time at transition
df_wom %>% filter(trans_st==1) -> womtrans
df_men %>% filter(trans_st==1) -> mentrans
# women
t1[c(1:2),c(1:4)] <- t1_contin[c(9:10),c(3:6)] # pay
t1[3,3] <- mean(df_wom$trans_st)
t1[4,3] <- mean(df_wom$trans_lt)
t1[5,c(1:4)] <- t1_contin[11, c(3:6)] # time
t1[c(7:11),3]<- round(prop.table(table(df_wom$phd_disci)), digits=2)[c(1:5)] # disci
t1[12,3] <- mean(df_wom$child_u5)
t1[13,c(1:4)] <- t1_contin[12, c(3:6)] # contract hours
t1[14,3] <- mean(df_wom$temporary_emp)
t1[c(15:16),c(1:4)] <- t1_contin[c(13:14),c(3:6)] # phd cohort, satis
t1[18, 3] <- mean(df_wom$sector_forpr)
t1[19, 3] <- mean(df_wom$sector_gov)
t1[20, 3] <- mean(df_wom$sector_nonpr)
t1[21, 3] <- mean(df_wom$otherjob)
t1[c(22:23),c(1:4)] <- t1_contin[c(15:16), c(3:6)] # break job, abroad
t1[24, 3] <- mean(df_wom$partnered)
t1[25, 1] <- quantile(womtrans$t, 0.01)
t1[25, 2] <- quantile(womtrans$t, 0.99)
t1[25, 3] <- mean(womtrans$t)
t1[25, 4] <- sd(womtrans$t)
t1[26, 3] <- nrow(df_wom[!duplicated(df_wom$RINPERSOON),])
t1[27, 3] <- nrow(df_wom)
# men
t1[c(1:2),c(5:8)] <- t1_contin[c(1:2),c(3:6)] # pay
t1[3,7] <- mean(df_men$trans_st)
t1[4,7] <- mean(df_men$trans_lt)
t1[5,c(5:8)] <- t1_contin[3, c(3:6)] # time
t1[c(7:11),7]<- round(prop.table(table(df_men$phd_disci)), digits=2)[c(1:5)] # disci
t1[12,7] <- mean(df_men$child_u5)
t1[13,c(5:8)] <- t1_contin[4, c(3:6)] # contract hours
t1[14,7] <- mean(df_men$temporary_emp)
t1[c(15:16),c(5:8)] <- t1_contin[c(5:6),c(3:6)] # phd cohort, satis
t1[18, 7] <- mean(df_men$sector_forpr)
t1[19, 7] <- mean(df_men$sector_gov)
t1[20, 7] <- mean(df_men$sector_nonpr)
t1[21, 7] <- mean(df_men$otherjob)
t1[c(22:23),c(5:8)] <- t1_contin[c(7:8), c(3:6)] # break job, abroad
t1[24, 7] <- mean(df_men$partnered)
t1[25, 5] <- quantile(mentrans$t, 0.01)
t1[25, 6] <- quantile(mentrans$t, 0.99)
t1[25, 7] <- mean(mentrans$t)
t1[25, 8] <- sd(mentrans$t)
t1[26, 7] <- nrow(df_men[!duplicated(df_men$RINPERSOON),])
t1[27, 7] <- nrow(df_men)
t1[c(1:25),] <- round(t1[c(1:25),], digits=2)
t1[c(26:27),] <- abs(t1[c(26:27),])
t1[26,] <- round(abs(t1[26,]), digits=0)
t1[27,] <- round(abs(t1[27,]), digits=0)
t1[is.na(t1)] <- ""
Descriptives_Table <- t1
write.csv(t1, file="F:/GPE_salaris/R&R/descriptive_tab.csv")
Table1 <- read.csv(file="Table1.csv", header=TRUE, check.names=FALSE)
Table1[is.na(Table1)] <- ""
kable(Table1, caption = "<b>Table 1: descriptive statistics for all variables used in our analyses, split out for men and women</b>") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Table 1: descriptive statistics for all variables used in our
analyses, split out for men and women
|
|
F: 1%
|
F: 99%
|
F: Mean
|
F: SD
|
M: 1%
|
M: 99%
|
M: Mean
|
M: SD
|
|
Real monthly pay
|
1085.08
|
15324.76
|
4867.1
|
2776.15
|
1441.4
|
17133.96
|
5529.32
|
3187.54
|
|
Log real monthly pay
|
6.99
|
9.64
|
8.36
|
0.52
|
7.27
|
9.75
|
8.48
|
0.51
|
|
Transition year
|
0
|
1
|
0.05
|
|
0
|
1
|
0.05
|
|
|
Transition experienced
|
0
|
1
|
0.32
|
|
0
|
1
|
0.35
|
|
|
Time
|
0
|
16
|
5.91
|
4.26
|
0
|
16
|
6.25
|
4.39
|
|
PhD Discipline
|
|
|
|
|
|
|
|
|
|
Health sciences
|
0
|
1
|
0.43
|
|
0
|
1
|
0.27
|
|
|
Social sciences
|
0
|
1
|
0.31
|
|
0
|
1
|
0.23
|
|
|
Natural sciences and mathematics
|
0
|
1
|
0.16
|
|
0
|
1
|
0.29
|
|
|
Engineering
|
0
|
1
|
0.05
|
|
0
|
1
|
0.14
|
|
|
Humanities
|
0
|
1
|
0.06
|
|
0
|
1
|
0.06
|
|
|
Child under 5
|
0
|
1
|
0.33
|
|
0
|
1
|
0.32
|
|
|
Log monthly contract hours
|
3.56
|
5.29
|
4.93
|
0.29
|
4.13
|
5.29
|
5.02
|
0.22
|
|
Temporary contract
|
0
|
1
|
0.39
|
|
0
|
1
|
0.34
|
|
|
PhD cohort
|
0
|
12
|
4.51
|
3.11
|
0
|
11
|
3.9
|
2.93
|
|
PhD satisfaction
|
2.12
|
4
|
3.14
|
0.38
|
2.25
|
4
|
3.22
|
0.37
|
|
Sector
|
|
|
|
|
|
|
|
|
|
For-profit
|
0
|
1
|
0.38
|
|
0
|
1
|
0.42
|
|
|
Government
|
0
|
1
|
0.48
|
|
0
|
1
|
0.48
|
|
|
Non-profit
|
0
|
1
|
0.14
|
|
0
|
1
|
0.1
|
|
|
Other job
|
0
|
1
|
0.06
|
|
0
|
1
|
0.06
|
|
|
Break in Dutch employment
|
0
|
16.99
|
0.9
|
4.83
|
0
|
16.46
|
0.84
|
4.78
|
|
Period abroad
|
0
|
0
|
0.43
|
6.52
|
0
|
24.31
|
0.78
|
7.97
|
|
Partnered
|
0
|
1
|
0.5
|
|
0
|
1
|
0.58
|
|
|
Time at transition
|
1
|
14
|
3.97
|
3.13
|
1
|
13.54
|
3.59
|
3.04
|
|
N individuals
|
|
|
2298
|
|
|
|
2278
|
|
|
N observations
|
|
|
23903
|
|
|
|
24883
|
|
Robustness: descriptive
statistics of people who go abroad after their PhD
load(file="H:/processed_data/abroadafterphd_long.rda")
levels(as.factor(abroadafterphd_long$phd_disci))
abroadafterphd_long$phd_disci <- factor(abroadafterphd_long$phd_disci, levels=c("Health sciences", "Social sciences", "Natural sciences and mathematics", "Engineering", "Humanities"))
abroadafterphd_long <- abroadafterphd_long %>%
mutate(gender = ifelse(gender==1, "men", "women"))
abroadafterphd_long$gender <- factor(abroadafterphd_long$gender, levels=c("men", "women"))
# removing missings
nrow(abroadafterphd_long[!duplicated(abroadafterphd_long$RINPERSOON),]) # 353
abroadafterphd_long %>%
filter(!is.na(phd_sat)) %>%
filter(!is.na(temporary_emp)) %>%
filter(!is.na(sect_adj)) -> abroadafterphd_long
nrow(abroadafterphd_long[!duplicated(abroadafterphd_long$RINPERSOON),]) # 352
# flattening time-varying variables (basepay)
columns <- c("F: 1%", "F: 99%", "F: Mean", "F: SD", "M: 1%", "M: 99%", "M: Mean", "M: SD")
rows <- c("Real monthly pay", "Log real monthly pay", "Transition year", "Transition experienced","Time" , "PhD Discipline", "Health sciences", "Social sciences", "Natural sciences and mathematics", "Engineering", "Humanities", "Child under 5", "Log monthly contract hours", "Temporary contract", "PhD cohort", "PhD satisfaction", "Sector", "For-profit", "Government", "Non-profit", "Other job", "Break in Dutch employment", "Period abroad", "Partnered", "Time at transition", "N individuals", "N observations")
t1_abroad <- data.frame(matrix(nrow=length(rows), ncol=length(columns)))
colnames(t1_abroad) <- columns
rownames(t1_abroad) <- rows
abroadafterphd_long %>%
group_by(gender) %>%
summarize(across(c(realpay_corr2, log_realpay, t, basehours_month, phd_coh, phd_sat, break_job, abroad_time),
list(p1 = ~quantile(.x, 0.01), p99 = ~quantile(.x, 0.99), mean = mean, sd=sd),
.names = "{.col}-{.fn}")) %>%
pivot_longer(
cols = -gender,
names_to = c("variable", "statistic"),
names_sep = "-"
) %>%
pivot_wider(
names_from = statistic,
values_from = value
) -> t1_abroad_contin
abroadafterphd_long %>% filter(gender=="women") -> df_wom
abroadafterphd_long %>% filter(gender=="men") -> df_men
# check if 1% and 99% of dummies is indeed 0/1
# c(quantile(df_men$trans_lt, 0.01), quantile(df_men$trans_lt, 0.99))
# c(quantile(df_men$trans_st, 0.01), quantile(df_men$trans_st, 0.99))
# c(quantile(df_men$child_u5, 0.01), quantile(df_men$child_u5, 0.99))
# c(quantile(df_men$temporary_emp, 0.01), quantile(df_men$temporary_emp, 0.99))
# c(quantile(df_men$sector_forpr, 0.01), quantile(df_men$sector_forpr, 0.99))
# c(quantile(df_men$sector_gov, 0.01), quantile(df_men$sector_gov, 0.99))
# c(quantile(df_men$sector_nonpr, 0.01), quantile(df_men$sector_nonpr, 0.99))
# c(quantile(df_men$otherjob, 0.01), quantile(df_men$otherjob, 0.99))
# c(quantile(df_men$partnered, 0.01), quantile(df_men$partnered, 0.99))
# c(quantile(df_wom$trans_lt, 0.01), quantile(df_wom$trans_lt, 0.99))
# c(quantile(df_wom$trans_st, 0.01), quantile(df_wom$trans_st, 0.99))
# c(quantile(df_wom$child_u5, 0.01), quantile(df_wom$child_u5, 0.99))
# c(quantile(df_wom$temporary_emp, 0.01), quantile(df_wom$temporary_emp, 0.99))
# c(quantile(df_wom$sector_forpr, 0.01), quantile(df_wom$sector_forpr, 0.99))
# c(quantile(df_wom$sector_gov, 0.01), quantile(df_wom$sector_gov, 0.99))
# c(quantile(df_wom$sector_nonpr, 0.01), quantile(df_wom$sector_nonpr, 0.99))
# c(quantile(df_wom$otherjob, 0.01), quantile(df_wom$otherjob, 0.99))
# c(quantile(df_wom$partnered, 0.01), quantile(df_wom$partnered, 0.99))
# it is!
t1_abroad[c(3,4,7:12,14,18:21,24),1] <- rep(0.0001, times=length(t1_abroad[c(3,4,7:12,14,18:21,24),1]))
t1_abroad[c(3,4,7:12,14,18:21,24),2] <- rep(1.0001, times=length(t1_abroad[c(3,4,7:12,14,18:21,24),2]))
t1_abroad[c(3,4,7:12,14,18:21,24),5] <- rep(0.0001, times=length(t1_abroad[c(3,4,7:12,14,18:21,24),5]))
t1_abroad[c(3,4,7:12,14,18:21,24),6] <- rep(1.0001, times=length(t1_abroad[c(3,4,7:12,14,18:21,24),6]))
# time at transition
df_wom %>% filter(trans_st==1) -> womtrans
df_men %>% filter(trans_st==1) -> mentrans
# women
t1_abroad[c(1:2),c(1:4)] <- t1_abroad_contin[c(9:10),c(3:6)] # pay
t1_abroad[3,3] <- mean(df_wom$trans_st)
t1_abroad[4,3] <- mean(df_wom$trans_lt)
t1_abroad[5,c(1:4)] <- t1_abroad_contin[11, c(3:6)] # time
t1_abroad[c(7:11),3]<- round(prop.table(table(df_wom$phd_disci)), digits=2)[c(1:5)] # disci
t1_abroad[12,3] <- mean(df_wom$child_u5)
t1_abroad[13,c(1:4)] <- t1_abroad_contin[12, c(3:6)] # contract hours
t1_abroad[14,3] <- mean(df_wom$temporary_emp)
t1_abroad[c(15:16),c(1:4)] <- t1_abroad_contin[c(13:14),c(3:6)] # phd cohort, satis
t1_abroad[18, 3] <- mean(df_wom$sector_forpr)
t1_abroad[19, 3] <- mean(df_wom$sector_gov)
t1_abroad[20, 3] <- mean(df_wom$sector_nonpr)
t1_abroad[21, 3] <- mean(df_wom$otherjob)
t1_abroad[c(22:23),c(1:4)] <- t1_abroad_contin[c(15:16), c(3:6)] # break job, abroad
t1_abroad[24, 3] <- mean(df_wom$partnered)
t1_abroad[25, 1] <- quantile(womtrans$t, 0.01)
t1_abroad[25, 2] <- quantile(womtrans$t, 0.99)
t1_abroad[25, 3] <- mean(womtrans$t)
t1_abroad[25, 4] <- sd(womtrans$t)
t1_abroad[26, 3] <- nrow(df_wom[!duplicated(df_wom$RINPERSOON),])
t1_abroad[27, 3] <- nrow(df_wom)
# men
t1_abroad[c(1:2),c(5:8)] <- t1_abroad_contin[c(1:2),c(3:6)] # pay
t1_abroad[3,7] <- mean(df_men$trans_st)
t1_abroad[4,7] <- mean(df_men$trans_lt)
t1_abroad[5,c(5:8)] <- t1_abroad_contin[3, c(3:6)] # time
t1_abroad[c(7:11),7]<- round(prop.table(table(df_men$phd_disci)), digits=2)[c(1:5)] # disci
t1_abroad[12,7] <- mean(df_men$child_u5)
t1_abroad[13,c(5:8)] <- t1_abroad_contin[4, c(3:6)] # contract hours
t1_abroad[14,7] <- mean(df_men$temporary_emp)
t1_abroad[c(15:16),c(5:8)] <- t1_abroad_contin[c(5:6),c(3:6)] # phd cohort, satis
t1_abroad[18, 7] <- mean(df_men$sector_forpr)
t1_abroad[19, 7] <- mean(df_men$sector_gov)
t1_abroad[20, 7] <- mean(df_men$sector_nonpr)
t1_abroad[21, 7] <- mean(df_men$otherjob)
t1_abroad[c(22:23),c(5:8)] <- t1_abroad_contin[c(7:8), c(3:6)] # break job, abroad
t1_abroad[24, 7] <- mean(df_men$partnered)
t1_abroad[25, 5] <- quantile(mentrans$t, 0.01)
t1_abroad[25, 6] <- quantile(mentrans$t, 0.99)
t1_abroad[25, 7] <- mean(mentrans$t)
t1_abroad[25, 8] <- sd(mentrans$t)
t1_abroad[26, 7] <- nrow(df_men[!duplicated(df_men$RINPERSOON),])
t1_abroad[27, 7] <- nrow(df_men)
t1_abroad[c(1:25),] <- round(t1_abroad[c(1:25),], digits=2)
t1_abroad[c(26:27),] <- abs(t1_abroad[c(26:27),])
t1_abroad[26,] <- round(abs(t1_abroad[26,]), digits=0)
t1_abroad[27,] <- round(abs(t1_abroad[27,]), digits=0)
t1_abroad[is.na(t1_abroad)] <- ""
t1_abroad <- t1_abroad[c(1:2,4:23,26:27),] # removing transition columns as these don't make sense
t1_abroad <- t1_abroad[,c(3,4,7,8)] # no min/max due to sample size restrictions
Descriptives_Table <- t1_abroad
write.csv(t1_abroad, file="F:/GPE_salaris/R&R/descriptive_tab_abroadphd.csv")
Table1_abroadphd <- read.csv(file="Table1_abroadphd.csv", header=TRUE, check.names=FALSE)
Table1_abroadphd[is.na(Table1_abroadphd)] <- ""
kable(Table1_abroadphd, caption = "<b>Table: descriptive statistics for PhDs who went abroad after their PhD</b>") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Table: descriptive statistics for PhDs who went abroad after their
PhD
|
|
F: Mean
|
F: SD
|
M: Mean
|
M: SD
|
|
Real monthly pay
|
4858.36
|
2015.94
|
5562.04
|
2651.9
|
|
Log real monthly pay
|
8.41
|
0.41
|
8.53
|
0.43
|
|
Transition experienced
|
0.58
|
|
0.55
|
|
|
Time
|
8.36
|
3.85
|
8.52
|
3.84
|
|
PhD Discipline
|
|
|
|
|
|
Health sciences
|
0.24
|
|
0.18
|
|
|
Social sciences
|
0.18
|
|
0.07
|
|
|
Natural sciences and mathematics
|
0.52
|
|
0.58
|
|
|
Engineering
|
0.05
|
|
0.13
|
|
|
Humanities
|
0.01
|
|
0.04
|
|
|
Child under 5
|
0.38
|
|
0.36
|
|
|
Log monthly contract hours
|
4.97
|
0.24
|
5.05
|
0.19
|
|
Temporary contract
|
0.37
|
|
0.37
|
|
|
PhD cohort
|
3.36
|
2.82
|
2.87
|
2.55
|
|
PhD satisfaction
|
3.21
|
0.35
|
3.23
|
0.42
|
|
Sector
|
|
|
|
|
|
For-profit
|
0.48
|
|
0.52
|
|
|
Government
|
0.42
|
|
0.41
|
|
|
Non-profit
|
0.09
|
|
0.07
|
|
|
Other job
|
0.02
|
|
0.02
|
|
|
Break in Dutch employment
|
0.87
|
4.41
|
0.61
|
3.05
|
|
Period abroad
|
6.95
|
20.91
|
9.16
|
27.04
|
|
N individuals
|
130
|
|
222
|
|
|
N observations
|
1258
|
|
2199
|
|
T-test table: are people who go abroad after their PhD different from
those who do not?
# chi-square gender distri:
gender_reg <- table(df_mmfc$gender[!duplicated(df_mmfc$RINPERSOON)])
gender_abr <- table(abroadafterphd_long$gender[!duplicated(abroadafterphd_long$RINPERSOON)])
gender_distri <- rbind.data.frame(gender_reg, gender_abr)
rownames(gender_distri) <- c("Main data", "Abroad after PhD")
colnames(gender_distri) <- c("Men", "Women")
chisq_gender <- chisq.test(gender_distri)
gender_distri$chisq <- chisq_gender$statistic
gender_distri$chisq_df <- chisq_gender$parameter
gender_distri$chisq_p <- chisq_gender$p.value
write.csv(gender_distri, file="F:/GPE_salaris/R&R/gender_distri_abroad.csv")
df_mmfc %>%
mutate(disci_health = ifelse(phd_disci=="Health sciences", 1, 0),
disci_social = ifelse(phd_disci=="Social sciences", 1, 0),
disci_natural = ifelse(phd_disci=="Natural sciences and mathematics", 1, 0),
disci_engineering = ifelse(phd_disci=="Engineering", 1, 0),
disci_humanities = ifelse(phd_disci=="Humanities", 1, 0)) -> df_mmfc
abroadafterphd_long %>%
mutate(disci_health = ifelse(phd_disci=="Health sciences", 1, 0),
disci_social = ifelse(phd_disci=="Social sciences", 1, 0),
disci_natural = ifelse(phd_disci=="Natural sciences and mathematics", 1, 0),
disci_engineering = ifelse(phd_disci=="Engineering", 1, 0),
disci_humanities = ifelse(phd_disci=="Humanities", 1, 0)) -> abroadafterphd_long
df_mmfc %>% filter(gender=="women") -> df_wom
df_mmfc %>% filter(gender=="men") -> df_men
abroadafterphd_long %>% filter(gender=="women") -> df_wom_a
abroadafterphd_long %>% filter(gender=="men") -> df_men_a
t1_v <- t.test(df_wom$realpay_corr2, df_wom_a$realpay_corr2)
t2_v <- t.test(df_wom$log_realpay, df_wom_a$log_realpay)
t3_v <- t.test(df_wom$t, df_wom_a$t)
t4_v <- t.test(df_wom$disci_health, df_wom_a$disci_health)
t5_v <- t.test(df_wom$disci_social, df_wom_a$disci_social)
t6_v <- t.test(df_wom$disci_natural, df_wom_a$disci_natural)
t7_v <- t.test(df_wom$disci_engineering, df_wom_a$disci_engineering)
t8_v <- t.test(df_wom$disci_humanities, df_wom_a$disci_humanities)
t9_v <- t.test(df_wom$child_u5, df_wom_a$child_u5)
t10_v <- t.test(df_wom$log_hrs, df_wom_a$log_hrs)
t11_v <- t.test(df_wom$temporary_emp, df_wom_a$temporary_emp)
t12_v <- t.test(df_wom$phd_coh, df_wom_a$phd_coh)
t13_v <- t.test(df_wom$phd_sat, df_wom_a$phd_sat)
t14_v <- t.test(df_wom$sector_forpr, df_wom_a$sector_forpr)
t15_v <- t.test(df_wom$sector_gov, df_wom_a$sector_gov)
t16_v <- t.test(df_wom$sector_nonpr, df_wom_a$sector_nonpr)
t17_v <- t.test(df_wom$otherjob, df_wom_a$otherjob)
t18_v <- t.test(df_wom$break_job, df_wom_a$break_job)
t19_v <- t.test(df_wom$abroad_time, df_wom_a$abroad_time)
t20_v <- t.test(df_wom$partnered, df_wom_a$partnered)
t1_m <- t.test(df_men$realpay_corr2, df_men_a$realpay_corr2)
t2_m <- t.test(df_men$log_realpay, df_men_a$log_realpay)
t3_m <- t.test(df_men$t, df_men_a$t)
t4_m <- t.test(df_men$disci_health, df_men_a$disci_health)
t5_m <- t.test(df_men$disci_social, df_men_a$disci_social)
t6_m <- t.test(df_men$disci_natural, df_men_a$disci_natural)
t7_m <- t.test(df_men$disci_engineering, df_men_a$disci_engineering)
t8_m <- t.test(df_men$disci_humanities, df_men_a$disci_humanities)
t9_m <- t.test(df_men$child_u5, df_men_a$child_u5)
t10_m <- t.test(df_men$log_hrs, df_men_a$log_hrs)
t11_m <- t.test(df_men$temporary_emp, df_men_a$temporary_emp)
t12_m <- t.test(df_men$phd_coh, df_men_a$phd_coh)
t13_m <- t.test(df_men$phd_sat, df_men_a$phd_sat)
t14_m <- t.test(df_men$sector_forpr, df_men_a$sector_forpr)
t15_m <- t.test(df_men$sector_gov, df_men_a$sector_gov)
t16_m <- t.test(df_men$sector_nonpr, df_men_a$sector_nonpr)
t17_m <- t.test(df_men$otherjob, df_men_a$otherjob)
t18_m <- t.test(df_men$break_job, df_men_a$break_job)
t19_m <- t.test(df_men$abroad_time, df_men_a$abroad_time)
t20_m <- t.test(df_men$partnered, df_men_a$partnered)
extract_t <- function(ttest) {
tibble(
t_value = unname(ttest$statistic),
df = unname(ttest$parameter),
p_value = unname(ttest$p.value)
)
}
test_wom <- mget(paste0("t", 1:20, "_v"))
test_men <- mget(paste0("t", 1:20, "_m"))
varnames <- c("Real monthly pay", "Log real monthly pay", "Time", "PhD discipline: Health sciences", "PhD discipline: Social sciences", "PhD discipline: Natural sciences & mathematics", "PhD discipline: Engineering", "PhD discipline: Humanities", "Child under 5", "Log Monthly contract hours", "Temporary contract", "PhD cohort", "PhD satisfaction", "Sector: For-profit", "Sector: Government", "Sector:Non-profit", "Other job", "Break in Dutch employment", "Period abroad", "Partnered")
test_df_wom <- bind_rows(lapply(test_wom, extract_t), .id="test_id") %>%
mutate(variable=varnames,
N_obs = (nrow(df_wom) + nrow(df_wom_a)),
group="women")
test_df_men <- bind_rows(lapply(test_men, extract_t), .id="test_id") %>%
mutate(variable=varnames,
N_obs = (nrow(df_men) + nrow(df_men_a)),
group="men")
test_df_all <- bind_rows(test_df_men, test_df_wom)
test_df_all$psmall <- ifelse(test_df_all$p_value<.001, 1, 0)
test_df_all$ttest <- paste0("t(", round(test_df_all$df, 2), ") = ", round(test_df_all$t_value, 2), ", p ", ifelse(test_df_all$psmall==1, "< .001", paste0("= ", round(test_df_all$p_value, 2))))
write.csv(test_df_all, file="F:/GPE_salaris/R&R/ttests_main_abroad.csv")
ttests_mainabroad <- read.csv(file="ttests_main_abroad.csv", header=TRUE, check.names = FALSE)
ttests_mainabroad[,-1]
## test_id t_value df p_value
## 1 t1_m -0.54482884 2791.092 5.859148e-01
## 2 t2_m -4.91808711 2782.432 9.247609e-07
## 3 t3_m -26.28683748 2731.112 5.864767e-136
## 4 t4_m 10.58758396 2745.622 1.057409e-25
## 5 t5_m 26.88090259 3393.241 1.843293e-144
## 6 t6_m -26.59154389 2536.902 1.219750e-137
## 7 t7_m 1.23312866 2626.082 2.176381e-01
## 8 t8_m 6.41187345 2901.868 1.672075e-10
## 9 t9_m -3.72901434 2578.664 1.963702e-04
## 10 t10_m -7.48455912 2787.712 9.570685e-14
## 11 t11_m -2.91269103 2585.656 3.614001e-03
## 12 t12_m 17.78638679 2734.406 4.679438e-67
## 13 t13_m -1.17182236 2502.966 2.413799e-01
## 14 t14_m -9.06312947 2590.921 2.435695e-19
## 15 t15_m 6.30574697 2613.798 3.356219e-10
## 16 t16_m 5.56488385 2780.548 2.873351e-08
## 17 t17_m 10.47898127 3288.980 2.688617e-25
## 18 t18_m 3.26277165 3242.145 1.114724e-03
## 19 t19_m -14.48310051 2231.901 1.677765e-45
## 20 t20_m -6.02140886 2629.183 1.970343e-09
## 21 t1_v 0.14665298 1519.638 8.834254e-01
## 22 t2_v -4.31270458 1473.152 1.719924e-05
## 23 t3_v -21.85057698 1424.003 1.641672e-91
## 24 t4_v 15.27586101 1440.897 6.090731e-49
## 25 t5_v 11.46403537 1455.524 3.431097e-29
## 26 t6_v -25.51582615 1327.420 3.507584e-117
## 27 t7_v 0.03913606 1393.127 9.687875e-01
## 28 t8_v 13.71807152 1973.628 5.632661e-41
## 29 t9_v -3.46311953 1384.384 5.502304e-04
## 30 t10_v -6.49330210 1447.355 1.151181e-10
## 31 t11_v 1.40339762 1395.222 1.607208e-01
## 32 t12_v 13.98859348 1422.669 9.322857e-42
## 33 t13_v -7.10179205 1418.733 1.941450e-12
## 34 t14_v -7.38925817 1384.287 2.543458e-13
## 35 t15_v 4.33908548 1395.738 1.533749e-05
## 36 t16_v 5.25756723 1449.498 1.678162e-07
## 37 t17_v 11.39910565 1751.582 4.353029e-29
## 38 t18_v 0.25543702 1419.811 7.984226e-01
## 39 t19_v -11.03042334 1269.902 4.420890e-27
## 40 t20_v -0.64049890 1392.486 5.219537e-01
## variable N_obs group psmall
## 1 Real monthly pay 27082 men 0
## 2 Log real monthly pay 27082 men 1
## 3 Time 27082 men 1
## 4 PhD discipline: Health sciences 27082 men 1
## 5 PhD discipline: Social sciences 27082 men 1
## 6 PhD discipline: Natural sciences & mathematics 27082 men 1
## 7 PhD discipline: Engineering 27082 men 0
## 8 PhD discipline: Humanities 27082 men 1
## 9 Child under 5 27082 men 1
## 10 Log Monthly contract hours 27082 men 1
## 11 Temporary contract 27082 men 0
## 12 PhD cohort 27082 men 1
## 13 PhD satisfaction 27082 men 0
## 14 Sector: For-profit 27082 men 1
## 15 Sector: Government 27082 men 1
## 16 Sector:Non-profit 27082 men 1
## 17 Other job 27082 men 1
## 18 Break in Dutch employment 27082 men 0
## 19 Period abroad 27082 men 1
## 20 Partnered 27082 men 1
## 21 Real monthly pay 25161 women 0
## 22 Log real monthly pay 25161 women 1
## 23 Time 25161 women 1
## 24 PhD discipline: Health sciences 25161 women 1
## 25 PhD discipline: Social sciences 25161 women 1
## 26 PhD discipline: Natural sciences & mathematics 25161 women 1
## 27 PhD discipline: Engineering 25161 women 0
## 28 PhD discipline: Humanities 25161 women 1
## 29 Child under 5 25161 women 1
## 30 Log Monthly contract hours 25161 women 1
## 31 Temporary contract 25161 women 0
## 32 PhD cohort 25161 women 1
## 33 PhD satisfaction 25161 women 1
## 34 Sector: For-profit 25161 women 1
## 35 Sector: Government 25161 women 1
## 36 Sector:Non-profit 25161 women 1
## 37 Other job 25161 women 1
## 38 Break in Dutch employment 25161 women 0
## 39 Period abroad 25161 women 1
## 40 Partnered 25161 women 0
## ttest
## 1 t(2791.09) = -0.54, p = 0.59
## 2 t(2782.43) = -4.92, p < .001
## 3 t(2731.11) = -26.29, p < .001
## 4 t(2745.62) = 10.59, p < .001
## 5 t(3393.24) = 26.88, p < .001
## 6 t(2536.9) = -26.59, p < .001
## 7 t(2626.08) = 1.23, p = 0.22
## 8 t(2901.87) = 6.41, p < .001
## 9 t(2578.66) = -3.73, p < .001
## 10 t(2787.71) = -7.48, p < .001
## 11 t(2585.66) = -2.91, p = 0
## 12 t(2734.41) = 17.79, p < .001
## 13 t(2502.97) = -1.17, p = 0.24
## 14 t(2590.92) = -9.06, p < .001
## 15 t(2613.8) = 6.31, p < .001
## 16 t(2780.55) = 5.56, p < .001
## 17 t(3288.98) = 10.48, p < .001
## 18 t(3242.15) = 3.26, p = 0
## 19 t(2231.9) = -14.48, p < .001
## 20 t(2629.18) = -6.02, p < .001
## 21 t(1519.64) = 0.15, p = 0.88
## 22 t(1473.15) = -4.31, p < .001
## 23 t(1424) = -21.85, p < .001
## 24 t(1440.9) = 15.28, p < .001
## 25 t(1455.52) = 11.46, p < .001
## 26 t(1327.42) = -25.52, p < .001
## 27 t(1393.13) = 0.04, p = 0.97
## 28 t(1973.63) = 13.72, p < .001
## 29 t(1384.38) = -3.46, p < .001
## 30 t(1447.36) = -6.49, p < .001
## 31 t(1395.22) = 1.4, p = 0.16
## 32 t(1422.67) = 13.99, p < .001
## 33 t(1418.73) = -7.1, p < .001
## 34 t(1384.29) = -7.39, p < .001
## 35 t(1395.74) = 4.34, p < .001
## 36 t(1449.5) = 5.26, p < .001
## 37 t(1751.58) = 11.4, p < .001
## 38 t(1419.81) = 0.26, p = 0.8
## 39 t(1269.9) = -11.03, p < .001
## 40 t(1392.49) = -0.64, p = 0.52
kable(ttests_mainabroad, caption = "<b>T-tests comparing PhDs who went abroad after their PhD and those who did not</b>") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
T-tests comparing PhDs who went abroad after their PhD and those who
did not
|
|
test_id
|
t_value
|
df
|
p_value
|
variable
|
N_obs
|
group
|
psmall
|
ttest
|
|
1
|
t1_m
|
-0.5448288
|
2791.092
|
0.5859148
|
Real monthly pay
|
27082
|
men
|
0
|
t(2791.09) = -0.54, p = 0.59
|
|
2
|
t2_m
|
-4.9180871
|
2782.432
|
0.0000009
|
Log real monthly pay
|
27082
|
men
|
1
|
t(2782.43) = -4.92, p < .001
|
|
3
|
t3_m
|
-26.2868375
|
2731.112
|
0.0000000
|
Time
|
27082
|
men
|
1
|
t(2731.11) = -26.29, p < .001
|
|
4
|
t4_m
|
10.5875840
|
2745.622
|
0.0000000
|
PhD discipline: Health sciences
|
27082
|
men
|
1
|
t(2745.62) = 10.59, p < .001
|
|
5
|
t5_m
|
26.8809026
|
3393.241
|
0.0000000
|
PhD discipline: Social sciences
|
27082
|
men
|
1
|
t(3393.24) = 26.88, p < .001
|
|
6
|
t6_m
|
-26.5915439
|
2536.902
|
0.0000000
|
PhD discipline: Natural sciences & mathematics
|
27082
|
men
|
1
|
t(2536.9) = -26.59, p < .001
|
|
7
|
t7_m
|
1.2331287
|
2626.082
|
0.2176381
|
PhD discipline: Engineering
|
27082
|
men
|
0
|
t(2626.08) = 1.23, p = 0.22
|
|
8
|
t8_m
|
6.4118734
|
2901.868
|
0.0000000
|
PhD discipline: Humanities
|
27082
|
men
|
1
|
t(2901.87) = 6.41, p < .001
|
|
9
|
t9_m
|
-3.7290143
|
2578.664
|
0.0001964
|
Child under 5
|
27082
|
men
|
1
|
t(2578.66) = -3.73, p < .001
|
|
10
|
t10_m
|
-7.4845591
|
2787.712
|
0.0000000
|
Log Monthly contract hours
|
27082
|
men
|
1
|
t(2787.71) = -7.48, p < .001
|
|
11
|
t11_m
|
-2.9126910
|
2585.656
|
0.0036140
|
Temporary contract
|
27082
|
men
|
0
|
t(2585.66) = -2.91, p = 0
|
|
12
|
t12_m
|
17.7863868
|
2734.406
|
0.0000000
|
PhD cohort
|
27082
|
men
|
1
|
t(2734.41) = 17.79, p < .001
|
|
13
|
t13_m
|
-1.1718224
|
2502.966
|
0.2413799
|
PhD satisfaction
|
27082
|
men
|
0
|
t(2502.97) = -1.17, p = 0.24
|
|
14
|
t14_m
|
-9.0631295
|
2590.921
|
0.0000000
|
Sector: For-profit
|
27082
|
men
|
1
|
t(2590.92) = -9.06, p < .001
|
|
15
|
t15_m
|
6.3057470
|
2613.798
|
0.0000000
|
Sector: Government
|
27082
|
men
|
1
|
t(2613.8) = 6.31, p < .001
|
|
16
|
t16_m
|
5.5648839
|
2780.548
|
0.0000000
|
Sector:Non-profit
|
27082
|
men
|
1
|
t(2780.55) = 5.56, p < .001
|
|
17
|
t17_m
|
10.4789813
|
3288.980
|
0.0000000
|
Other job
|
27082
|
men
|
1
|
t(3288.98) = 10.48, p < .001
|
|
18
|
t18_m
|
3.2627717
|
3242.145
|
0.0011147
|
Break in Dutch employment
|
27082
|
men
|
0
|
t(3242.15) = 3.26, p = 0
|
|
19
|
t19_m
|
-14.4831005
|
2231.901
|
0.0000000
|
Period abroad
|
27082
|
men
|
1
|
t(2231.9) = -14.48, p < .001
|
|
20
|
t20_m
|
-6.0214089
|
2629.183
|
0.0000000
|
Partnered
|
27082
|
men
|
1
|
t(2629.18) = -6.02, p < .001
|
|
21
|
t1_v
|
0.1466530
|
1519.638
|
0.8834254
|
Real monthly pay
|
25161
|
women
|
0
|
t(1519.64) = 0.15, p = 0.88
|
|
22
|
t2_v
|
-4.3127046
|
1473.152
|
0.0000172
|
Log real monthly pay
|
25161
|
women
|
1
|
t(1473.15) = -4.31, p < .001
|
|
23
|
t3_v
|
-21.8505770
|
1424.003
|
0.0000000
|
Time
|
25161
|
women
|
1
|
t(1424) = -21.85, p < .001
|
|
24
|
t4_v
|
15.2758610
|
1440.897
|
0.0000000
|
PhD discipline: Health sciences
|
25161
|
women
|
1
|
t(1440.9) = 15.28, p < .001
|
|
25
|
t5_v
|
11.4640354
|
1455.524
|
0.0000000
|
PhD discipline: Social sciences
|
25161
|
women
|
1
|
t(1455.52) = 11.46, p < .001
|
|
26
|
t6_v
|
-25.5158262
|
1327.420
|
0.0000000
|
PhD discipline: Natural sciences & mathematics
|
25161
|
women
|
1
|
t(1327.42) = -25.52, p < .001
|
|
27
|
t7_v
|
0.0391361
|
1393.127
|
0.9687875
|
PhD discipline: Engineering
|
25161
|
women
|
0
|
t(1393.13) = 0.04, p = 0.97
|
|
28
|
t8_v
|
13.7180715
|
1973.628
|
0.0000000
|
PhD discipline: Humanities
|
25161
|
women
|
1
|
t(1973.63) = 13.72, p < .001
|
|
29
|
t9_v
|
-3.4631195
|
1384.384
|
0.0005502
|
Child under 5
|
25161
|
women
|
1
|
t(1384.38) = -3.46, p < .001
|
|
30
|
t10_v
|
-6.4933021
|
1447.355
|
0.0000000
|
Log Monthly contract hours
|
25161
|
women
|
1
|
t(1447.36) = -6.49, p < .001
|
|
31
|
t11_v
|
1.4033976
|
1395.222
|
0.1607208
|
Temporary contract
|
25161
|
women
|
0
|
t(1395.22) = 1.4, p = 0.16
|
|
32
|
t12_v
|
13.9885935
|
1422.669
|
0.0000000
|
PhD cohort
|
25161
|
women
|
1
|
t(1422.67) = 13.99, p < .001
|
|
33
|
t13_v
|
-7.1017921
|
1418.733
|
0.0000000
|
PhD satisfaction
|
25161
|
women
|
1
|
t(1418.73) = -7.1, p < .001
|
|
34
|
t14_v
|
-7.3892582
|
1384.287
|
0.0000000
|
Sector: For-profit
|
25161
|
women
|
1
|
t(1384.29) = -7.39, p < .001
|
|
35
|
t15_v
|
4.3390855
|
1395.738
|
0.0000153
|
Sector: Government
|
25161
|
women
|
1
|
t(1395.74) = 4.34, p < .001
|
|
36
|
t16_v
|
5.2575672
|
1449.498
|
0.0000002
|
Sector:Non-profit
|
25161
|
women
|
1
|
t(1449.5) = 5.26, p < .001
|
|
37
|
t17_v
|
11.3991057
|
1751.582
|
0.0000000
|
Other job
|
25161
|
women
|
1
|
t(1751.58) = 11.4, p < .001
|
|
38
|
t18_v
|
0.2554370
|
1419.811
|
0.7984226
|
Break in Dutch employment
|
25161
|
women
|
0
|
t(1419.81) = 0.26, p = 0.8
|
|
39
|
t19_v
|
-11.0304233
|
1269.902
|
0.0000000
|
Period abroad
|
25161
|
women
|
1
|
t(1269.9) = -11.03, p < .001
|
|
40
|
t20_v
|
-0.6404989
|
1392.486
|
0.5219537
|
Partnered
|
25161
|
women
|
0
|
t(1392.49) = -0.64, p = 0.52
|
