library(tidyverse)
library(data.table)
library(ggplot2)
library(tictoc)
library(fixest)
library(arrow)
library(broom)


data <- fread("P:/Company Tax/Data/EUM/bas_eum_reg_data.csv")

# Filtering for time
data <- data[tsid >= 12 & tsid <= 19]

# Creating Treated Variables
DnD15 <- data[, treated := ifelse(total_comp_income < 2 * 10^6, 1, 0)] # Lower tax rate if turnover < $2m
DnD15 <- DnD15[, treated := ifelse(base_rate_entity == 0 & tsid >= 18, 0, treated)] # If they are not a base rate entity, they don't get the lower tax rate
DnD15 <- DnD15[, policy_period := ifelse(tsid >= 16, 1, 0)] # tsid is the last 2 digits of a financial year
DnD15 <- DnD15[tsid < 18] # Data after this is unreliable for companies with turnover less than $10M

DnD16 <- data[, treated := ifelse(total_comp_income < 10 * 10^6 & total_comp_income >= 2 * 10^6 , 1, 0)] # Excluding those that were previously treated
DnD16 <- DnD16[, treated := ifelse(base_rate_entity == 0 & tsid >= 18, 0, treated)] # If they are not a base rate entity, they don't get the lower tax rate
DnD16 <- DnD16[, policy_period := ifelse(tsid >= 17, 1, 0)]
DnD16 <- DnD16[tsid < 18] # Data after this is unreliable for companies with turnover less than $10M

DnD17 <- data[, treated := ifelse(total_comp_income < 25 * 10^6 & total_comp_income >= 10 * 10^6 , 1, 0)] # Excluding those that were previously treated
DnD17 <- DnD17[, treated := ifelse(base_rate_entity == 0 & tsid >= 18, 0, treated)] # If they are not a base rate entity, they don't get the lower tax rate
DnD17 <- DnD17[, policy_period := ifelse(tsid >= 18, 1, 0)] # tsid is the last 2 digits of a financial year
DnD17 <- DnD17[turnover >= 10 * 10^6]

DnD18 <- data[, treated := ifelse(total_comp_income < 50 * 10^6 & total_comp_income >= 25 * 10^6 , 1, 0)] # Excluding those that were previously treated
DnD18 <- DnD18[, treated := ifelse(base_rate_entity == 0 & tsid >= 18, 0, treated)] # If they are not a base rate entity, they don't get the lower tax rate
DnD18 <- DnD18[, policy_period := ifelse(tsid >= 19, 1, 0)] # tsid is the last 2 digits of a financial year
DnD18 <- DnD18[turnover >= 10 * 10^6]

# Only include companies for DnD
DnD15 <- DnD15[company_flag == 1]
DnD16 <- DnD16[company_flag == 1]
DnD17 <- DnD17[company_flag == 1]
DnD18 <- DnD18[company_flag == 1]

# Extensive Margin

# Creating Log Odds Ratio
DnD15 <- DnD15[, `:=`(
  firms_size_ind = sum(!is.na(capex)),
  invs_size_ind = sum(invest_dummy)
), by = .(treated, industry_simple, tsid)]

DnD15 <- DnD15[, invs_size_ind_share := invs_size_ind / firms_size_ind] # Compute share of firms investing

# Compute Log Odds capping at 4 or -4
DnD15 <- DnD15[, log_odds := fcase(
  invs_size_ind_share == 1 | log(invs_size_ind_share / (1 - invs_size_ind_share)) > 4, 4,
  invs_size_ind_share == 0 | log(invs_size_ind_share / (1 - invs_size_ind_share)) < -4, -4,
  invs_size_ind_share > 0 & invs_size_ind_share < 1, log(invs_size_ind_share / (1 - invs_size_ind_share))
)] 

# Estimate the effect of the 2015 Tax Cut on the extensive margin
model_DnD15_Ext <- feols(
  log_odds ~ i(period, treated, 15) + log_lqdty + age + fte_log + log_tot_inc + foreign_flag | 
    # FIXED EFFECTS
    industry_simple^tsid_factor + tsid_factor + 
    bg_id
  ,
  cluster = ~ industry_simple,
  data = DnD15
)

# Creating Log Odds Ratio
DnD16[, `:=`(
  firms_size_ind = sum(!is.na(capex)),
  invs_size_ind = sum(invest_dummy)
), by = .(treated, industry_simple, tsid)]

DnD16 <- DnD16[, invs_size_ind_share := invs_size_ind / firms_size_ind] # Compute share of firms investing

# Compute Log Odds capping at 4 or -4
DnD16 <- DnD16[, log_odds := fcase(
  invs_size_ind_share == 1 | log(invs_size_ind_share / (1 - invs_size_ind_share)) > 4, 4,
  invs_size_ind_share == 0 | log(invs_size_ind_share / (1 - invs_size_ind_share)) < -4, -4,
  invs_size_ind_share > 0 & invs_size_ind_share < 1, log(invs_size_ind_share / (1 - invs_size_ind_share))
)] 

# Estimate the effect of the 2016 Tax Cut on the extensive margin
model_DnD16_Ext <- feols(
  log_odds ~ i(period, treated, 16) + log_lqdty + age + fte_log + log_tot_inc + foreign_flag | 
    # FIXED EFFECTS
    industry_simple^tsid_factor + tsid_factor + 
    bg_id
  ,
  cluster = ~ industry_simple,
  data = DnD16
)

# Creating Log Odds Ratio
DnD17[, `:=`(
  firms_size_ind = sum(!is.na(capex)),
  invs_size_ind = sum(invest_dummy)
), by = .(treated, industry_simple, tsid)]

DnD17 <- DnD17[, invs_size_ind_share := invs_size_ind / firms_size_ind] # Compute share of firms investing

# Compute Log Odds capping at 4 or -4
DnD17 <- DnD17[, log_odds := fcase(
  invs_size_ind_share == 1 | log(invs_size_ind_share / (1 - invs_size_ind_share)) > 4, 4,
  invs_size_ind_share == 0 | log(invs_size_ind_share / (1 - invs_size_ind_share)) < -4, -4,
  invs_size_ind_share > 0 & invs_size_ind_share < 1, log(invs_size_ind_share / (1 - invs_size_ind_share))
)] 

# Estimate the effect of the 2017 Tax Cut on the extensive margin
model_DnD17_Ext <- feols(
  log_odds ~ i(period, treated, 17) + log_lqdty + age + fte_log + log_tot_inc + foreign_flag | 
    # FIXED EFFECTS
    industry_simple^tsid_factor + tsid_factor +
    bg_id
  ,
  cluster = ~ industry_simple,
  data = DnD17
)

# Creating Log Odds Ratio
DnD18[, `:=`(
  firms_size_ind = sum(!is.na(capex)),
  invs_size_ind = sum(invest_dummy)
), by = .(treated, industry_simple, tsid)]

DnD18 <- DnD18[, invs_size_ind_share := invs_size_ind / firms_size_ind] # Compute share of firms investing

# Compute Log Odds capping at 4 or -4
DnD18 <- DnD18[, log_odds := fcase(
  invs_size_ind_share == 1 | log(invs_size_ind_share / (1 - invs_size_ind_share)) > 4, 4,
  invs_size_ind_share == 0 | log(invs_size_ind_share / (1 - invs_size_ind_share)) < -4, -4,
  invs_size_ind_share > 0 & invs_size_ind_share < 1, log(invs_size_ind_share / (1 - invs_size_ind_share))
)] 

# Estimate the effect of the 2018 Tax Cut on the extensive margin
model_DnD18_Ext <- feols(
  log_odds ~ i(period, treated, 18) + log_lqdty + age + fte_log + log_tot_inc + foreign_flag | 
    # FIXED EFFECTS
    industry_simple^tsid_factor + tsid_factor +
    bg_id
  ,
  cluster = ~ industry_simple,
  data = DnD18
)

# Saving only coefficient outputs and not fixed effects
tidy_model_DnD15_Ext <- broom::tidy(model_DnD15_Ext)
tidy_model_DnD16_Ext <- broom::tidy(model_DnD16_Ext)
tidy_model_DnD17_Ext <- broom::tidy(model_DnD17_Ext)
tidy_model_DnD18_Ext <- broom::tidy(model_DnD18_Ext)

# Downloading model summary
qs::qsave(tidy_model_DnD15_Ext, "P:/Company Tax/Econometrics for Clearance/Models to Clear/Dynamic With Controls/BAS_15_Ext_dynamic.qs")
qs::qsave(tidy_model_DnD16_Ext, "P:/Company Tax/Econometrics for Clearance/Models to Clear/Dynamic With Controls/BAS_16_Ext_dynamic.qs")
qs::qsave(tidy_model_DnD17_Ext, "P:/Company Tax/Econometrics for Clearance/Models to Clear/Dynamic With Controls/BAS_17_Ext_dynamic.qs")
qs::qsave(tidy_model_DnD18_Ext, "P:/Company Tax/Econometrics for Clearance/Models to Clear/Dynamic With Controls/BAS_18_Ext_dynamic.qs")

# Record number of observations in each model
model_names <- c("model_DnD15_Ext", "model_DnD16_Ext", "model_DnD17_Ext", "model_DnD18_Ext")
bas_num_obs_list_extensive <- data.frame(
  Model = model_names,
  Number_of_Observations = sapply(mget(model_names), nobs)
)

# Intensive Regressions

# Remove CAPEX = 0
DnD15 <- DnD15[capex != 0]
DnD16 <- DnD16[capex != 0]
DnD17 <- DnD17[capex != 0]
DnD18 <- DnD18[capex != 0]

# Estimate the effect of the 2015, 2016, 2017 and 2018 Tax Cut on the intensive margin

model_DnD15_Int <- feols(
  capex_log ~ i(period, treated, 15) + log_lqdty + age + fte_log + log_tot_inc + foreign_flag | 
    # FIXED EFFECTS
    industry_simple^tsid_factor + tsid_factor +
    bg_id,
  cluster = ~ bg_id,
  data = DnD15)

model_DnD16_Int <- feols(
  capex_log ~ i(period, treated, 16) + log_lqdty + age + fte_log + log_tot_inc + foreign_flag | 
    # FIXED EFFECTS
    industry_simple^tsid_factor + tsid_factor + bg_id,
  cluster = ~ bg_id, 
  data = DnD16)

model_DnD17_Int <- feols(
  capex_log ~ i(period, treated, 17) + log_lqdty + age + fte_log + log_tot_inc + foreign_flag | 
    # FIXED EFFECTS
    industry_simple^tsid_factor + tsid_factor + bg_id,
  cluster = ~ bg_id, 
  #weights = ~weight,
  data = DnD17)

model_DnD18_Int <- feols(
  capex_log ~ i(period, treated, 18) + log_lqdty + age + fte_log + log_tot_inc + foreign_flag | 
    # FIXED EFFECTS
    industry_simple^tsid_factor + tsid_factor + bg_id,
  cluster = ~ bg_id, 
  #weights = ~weight,
  data = DnD18)

# Saving only coefficient outputs and not fixed effects
tidy_model_DnD15_Int <- broom::tidy(model_DnD15_Int)
tidy_model_DnD16_Int <- broom::tidy(model_DnD16_Int)
tidy_model_DnD17_Int <- broom::tidy(model_DnD17_Int)
tidy_model_DnD18_Int <- broom::tidy(model_DnD18_Int)

# Downloading model summary
qs::qsave(tidy_model_DnD15_Int, "P:/Company Tax/Econometrics for Clearance/Models to Clear/Dynamic With Controls/BAS_15_Int_dynamic.qs")
qs::qsave(tidy_model_DnD16_Int, "P:/Company Tax/Econometrics for Clearance/Models to Clear/Dynamic With Controls/BAS_16_Int_dynamic.qs")
qs::qsave(tidy_model_DnD17_Int, "P:/Company Tax/Econometrics for Clearance/Models to Clear/Dynamic With Controls/BAS_17_Int_dynamic.qs")
qs::qsave(tidy_model_DnD18_Int, "P:/Company Tax/Econometrics for Clearance/Models to Clear/Dynamic With Controls/BAS_18_Int_dynamic.qs")

# Record number of observations in each model
model_names <- c("model_DnD15_Int", "model_DnD16_Int", "model_DnD17_Int", "model_DnD18_Int")
bas_num_obs_list_intensive <- data.frame(
  Model = model_names,
  Number_of_Observations = sapply(mget(model_names), nobs))

# CAPEX SECTION
data <- fread("P:/Company Tax/Data/EUM/capex_eum_reg_data.csv")

# Filtering for time
data <- data[tsid >= 12 & tsid <= 19]

# Creating Treated Variables
DnD15 <- data[, treated := ifelse(total_comp_income < 2 * 10^6, 1, 0)] # Lower tax rate if turnover < $2m
DnD15 <- DnD15[, treated := ifelse(base_rate_entity == 0 & tsid >= 18, 0, treated)] # If they are not a CAPEXe rate entity, they don't get the lower tax rate
DnD15 <- DnD15[, policy_period := ifelse(tsid >= 16, 1, 0)] # tsid is the last 2 digits of a financial year

DnD16 <- data[, treated := ifelse(total_comp_income < 10 * 10^6 & total_comp_income >= 2 * 10^6 , 1, 0)] # Excluding those that were previously treated
DnD16 <- DnD16[, treated := ifelse(base_rate_entity == 0 & tsid >= 18, 0, treated)] # If they are not a CAPEXe rate entity, they don't get the lower tax rate
DnD16 <- DnD16[, policy_period := ifelse(tsid >= 17, 1, 0)]

DnD17 <- data[, treated := ifelse(total_comp_income < 25 * 10^6 & total_comp_income >= 10 * 10^6 , 1, 0)] # Excluding those that were previously treated
DnD17 <- DnD17[, treated := ifelse(base_rate_entity == 0 & tsid >= 18, 0, treated)] # If they are not a CAPEXe rate entity, they don't get the lower tax rate
DnD17 <- DnD17[, policy_period := ifelse(tsid >= 18, 1, 0)] # tsid is the last 2 digits of a financial year

DnD18 <- data[, treated := ifelse(total_comp_income < 50 * 10^6 & total_comp_income >= 25 * 10^6 , 1, 0)] # Excluding those that were previously treated
DnD18 <- DnD18[, treated := ifelse(base_rate_entity == 0 & tsid >= 18, 0, treated)] # If they are not a CAPEXe rate entity, they don't get the lower tax rate
DnD18 <- DnD18[, policy_period := ifelse(tsid >= 19, 1, 0)] # tsid is the last 2 digits of a financial year

# Extensive Margin

# Creating Log Odds Ratio
DnD15 <- DnD15[, `:=`(
  firms_size_ind = sum(!is.na(capex)),
  invs_size_ind = sum(invest_dummy)
), by = .(treated, industry_simple, tsid)]

DnD15 <- DnD15[, invs_size_ind_share := invs_size_ind / firms_size_ind] # Compute share of firms investing

# Compute Log Odds capping at 4 or -4
DnD15 <- DnD15[, log_odds := fcase(
  invs_size_ind_share == 1 | log(invs_size_ind_share / (1 - invs_size_ind_share)) > 4, 4,
  invs_size_ind_share == 0 | log(invs_size_ind_share / (1 - invs_size_ind_share)) < -4, -4,
  invs_size_ind_share > 0 & invs_size_ind_share < 1, log(invs_size_ind_share / (1 - invs_size_ind_share))
)] 

# Estimate the effect of the 2015 Tax Cut on the extensive margin
model_DnD15_Ext <- feols(
  log_odds ~ i(period, treated, 15) + log_lqdty + age + fte_log + log_tot_inc + foreign_flag | 
    # FIXED EFFECTS
    industry_simple^tsid_factor + tsid_factor + 
    bg_id
  ,
  cluster = ~ industry_simple,
  data = DnD15
)

# Creating Log Odds Ratio
DnD16[, `:=`(
  firms_size_ind = sum(!is.na(capex)),
  invs_size_ind = sum(invest_dummy)
), by = .(treated, industry_simple, tsid)]

DnD16 <- DnD16[, invs_size_ind_share := invs_size_ind / firms_size_ind] # Compute share of firms investing

# Compute Log Odds capping at 4 or -4
DnD16 <- DnD16[, log_odds := fcase(
  invs_size_ind_share == 1 | log(invs_size_ind_share / (1 - invs_size_ind_share)) > 4, 4,
  invs_size_ind_share == 0 | log(invs_size_ind_share / (1 - invs_size_ind_share)) < -4, -4,
  invs_size_ind_share > 0 & invs_size_ind_share < 1, log(invs_size_ind_share / (1 - invs_size_ind_share))
)] 

# Estimate the effect of the 2016 Tax Cut on the extensive margin
model_DnD16_Ext <- feols(
  log_odds ~ i(period, treated, 16) + log_lqdty + age + fte_log + log_tot_inc + foreign_flag | 
    # FIXED EFFECTS
    industry_simple^tsid_factor + tsid_factor + 
    bg_id
  ,
  cluster = ~ industry_simple,
  data = DnD16
)

# Creating Log Odds Ratio
DnD17[, `:=`(
  firms_size_ind = sum(!is.na(capex)),
  invs_size_ind = sum(invest_dummy)
), by = .(treated, industry_simple, tsid)]

DnD17 <- DnD17[, invs_size_ind_share := invs_size_ind / firms_size_ind] # Compute share of firms investing

# Compute Log Odds capping at 4 or -4
DnD17 <- DnD17[, log_odds := fcase(
  invs_size_ind_share == 1 | log(invs_size_ind_share / (1 - invs_size_ind_share)) > 4, 4,
  invs_size_ind_share == 0 | log(invs_size_ind_share / (1 - invs_size_ind_share)) < -4, -4,
  invs_size_ind_share > 0 & invs_size_ind_share < 1, log(invs_size_ind_share / (1 - invs_size_ind_share))
)] 

# Estimate the effect of the 2017 Tax Cut on the extensive margin
model_DnD17_Ext <- feols(
  log_odds ~ i(period, treated, 17) + log_lqdty + age + fte_log + log_tot_inc + foreign_flag | 
    # FIXED EFFECTS
    industry_simple^tsid_factor + tsid_factor +
    bg_id
  ,
  cluster = ~ industry_simple,
  data = DnD17
)

# Creating Log Odds Ratio
DnD18[, `:=`(
  firms_size_ind = sum(!is.na(capex)),
  invs_size_ind = sum(invest_dummy)
), by = .(treated, industry_simple, tsid)]

DnD18 <- DnD18[, invs_size_ind_share := invs_size_ind / firms_size_ind] # Compute share of firms investing

# Compute Log Odds capping at 4 or -4
DnD18 <- DnD18[, log_odds := fcase(
  invs_size_ind_share == 1 | log(invs_size_ind_share / (1 - invs_size_ind_share)) > 4, 4,
  invs_size_ind_share == 0 | log(invs_size_ind_share / (1 - invs_size_ind_share)) < -4, -4,
  invs_size_ind_share > 0 & invs_size_ind_share < 1, log(invs_size_ind_share / (1 - invs_size_ind_share))
)] 

# Estimate the effect of the 2018 Tax Cut on the extensive margin
model_DnD18_Ext <- feols(
  log_odds ~ i(period, treated, 18) + log_lqdty + age + fte_log + log_tot_inc + foreign_flag | 
    # FIXED EFFECTS
    industry_simple^tsid_factor + tsid_factor +
    bg_id
  ,
  cluster = ~ industry_simple,
  data = DnD18
)

# Saving only coefficient outputs and not fixed effects
tidy_model_DnD15_Ext <- broom::tidy(model_DnD15_Ext)
tidy_model_DnD16_Ext <- broom::tidy(model_DnD16_Ext)
tidy_model_DnD17_Ext <- broom::tidy(model_DnD17_Ext)
tidy_model_DnD18_Ext <- broom::tidy(model_DnD18_Ext)

qs::qsave(tidy_model_DnD15_Ext, "P:/Company Tax/Econometrics for Clearance/Models to Clear/Dynamic With Controls/CAPEX_15_Ext_dynamic.qs")
qs::qsave(tidy_model_DnD16_Ext, "P:/Company Tax/Econometrics for Clearance/Models to Clear/Dynamic With Controls/CAPEX_16_Ext_dynamic.qs")
qs::qsave(tidy_model_DnD17_Ext, "P:/Company Tax/Econometrics for Clearance/Models to Clear/Dynamic With Controls/CAPEX_17_Ext_dynamic.qs")
qs::qsave(tidy_model_DnD18_Ext, "P:/Company Tax/Econometrics for Clearance/Models to Clear/Dynamic With Controls/CAPEX_18_Ext_dynamic.qs")

model_names <- c("model_DnD15_Ext", "model_DnD16_Ext", "model_DnD17_Ext", "model_DnD18_Ext")

capex_num_obs_list_extensive <- data.frame(
  Model = model_names,
  Number_of_Observations = sapply(mget(model_names), nobs)
)


# Intensive Regressions

# Remove CAPEX = 0
DnD15 <- DnD15[capex != 0]
DnD16 <- DnD16[capex != 0]
DnD17 <- DnD17[capex != 0]
DnD18 <- DnD18[capex != 0]

# Estimate the effect of the 2015, 2016, 2017 and 2018 Tax Cut on the intensive margin
model_DnD15_Int <- feols(
  capex_log ~ i(period, treated, 15) + log_lqdty + age + fte_log + log_tot_inc + foreign_flag | 
    # FIXED EFFECTS
    industry_simple^tsid_factor + tsid_factor +
    bg_id,
  cluster = ~ bg_id,
  data = DnD15)

model_DnD16_Int <- feols(
  capex_log ~ i(period, treated, 16) + log_lqdty + age + fte_log + log_tot_inc + foreign_flag | 
    # FIXED EFFECTS
    industry_simple^tsid_factor +  tsid_factor + bg_id,
  cluster = ~ bg_id, 
  data = DnD16)

model_DnD17_Int <- feols(
  capex_log ~ i(period, treated, 17) + log_lqdty + age + fte_log + log_tot_inc + foreign_flag | 
    # FIXED EFFECTS
    industry_simple^tsid_factor + tsid_factor + bg_id,
  cluster = ~ bg_id, 
  data = DnD17)

model_DnD18_Int <- feols(
  capex_log ~ i(period, treated, 18) + log_lqdty + age + fte_log + log_tot_inc + foreign_flag | 
    # FIXED EFFECTS
    industry_simple^tsid_factor + tsid_factor + bg_id,
  cluster = ~ bg_id, 
  data = DnD18)

# Saving only coefficient outputs and not fixed effects
tidy_model_DnD15_Int <- broom::tidy(model_DnD15_Int)
tidy_model_DnD16_Int <- broom::tidy(model_DnD16_Int)
tidy_model_DnD17_Int <- broom::tidy(model_DnD17_Int)
tidy_model_DnD18_Int <- broom::tidy(model_DnD18_Int)

qs::qsave(tidy_model_DnD15_Int, "P:/Company Tax/Econometrics for Clearance/Models to Clear/Dynamic With Controls/CAPEX_15_Int_dynamic.qs")
qs::qsave(tidy_model_DnD16_Int, "P:/Company Tax/Econometrics for Clearance/Models to Clear/Dynamic With Controls/CAPEX_16_Int_dynamic.qs")
qs::qsave(tidy_model_DnD17_Int, "P:/Company Tax/Econometrics for Clearance/Models to Clear/Dynamic With Controls/CAPEX_17_Int_dynamic.qs")
qs::qsave(tidy_model_DnD18_Int, "P:/Company Tax/Econometrics for Clearance/Models to Clear/Dynamic With Controls/CAPEX_18_Int_dynamic.qs")


# Record number of observations in each model
model_names <- c("model_DnD15_Int", "model_DnD16_Int", "model_DnD17_Int", "model_DnD18_Int")

capex_num_obs_list_intensive <- data.frame(
  Model = model_names,
  Number_of_Observations = sapply(mget(model_names), nobs)
)

dynamic_w_ctrls_num_obs_list <- bind_rows(
  bas_num_obs_list_intensive %>% mutate(Model = paste0("bas_", Model)),
  bas_num_obs_list_extensive %>% mutate(Model = paste0("bas_", Model)),
  capex_num_obs_list_intensive %>% mutate(Model = paste0("capex_", Model)),
  capex_num_obs_list_extensive %>% mutate(Model = paste0("capex_", Model)),
  )

write_csv(dynamic_w_ctrls_num_obs_list, "P:/Company Tax/Econometrics for Clearance/Models to Clear/Number of Observations in each Model/dynamic_w_ctrls_num_obs_list.csv")























