
library(tidyverse)
library(data.table)
library(ggplot2)
library(tictoc)
library(lubridate)

capex_data <- fread("P:/Company Tax/Data/EUM/capex_eum_reg_data.csv") # Remember this is investment in buildings and structures

# Creating Treated Variables, if they are not a base rate entity, they don't get the lower tax rate
capex_data <- capex_data[, treated_groups := fcase(
  total_comp_income < 2 * 10^6, "treated_15",
  total_comp_income < 10 * 10^6 & total_comp_income >= 2 * 10^6, "treated_16",
  total_comp_income < 25 * 10^6 & total_comp_income >= 10 * 10^6, "treated_17",
  total_comp_income < 50 * 10^6 & total_comp_income >= 25 * 10^6, "treated_18",
  total_comp_income > 50 * 10^6, "not treated")]

# Correcting for base rate entity
capex_data <- capex_data[, treated_groups := fifelse(base_rate_entity == 0 & tsid >= 18, "not treated", treated_groups)]  

average_capex_data <- capex_data %>% 
  group_by(treated_groups, tsid) %>% 
  filter(between(capex, quantile(capex, 0.01), quantile(capex, 0.99))) %>% 
  summarise(average_investment = mean(capex, na.rm = TRUE),
            num_companies = n(),
            one_50_check = max(capex) / sum(capex, na.rm = TRUE),
            two_67_check = (sort(unique(capex), decreasing = TRUE)[2] + max(capex)) / sum(capex, na.rm = TRUE)) %>% 
  ungroup() %>% 
  filter(tsid > 10 & num_companies > 10)

write_csv(average_capex_data, "average_capex_data.csv")


# Also average foreign ownership, liquidity, 

capex_DD_stats <- capex_data %>% 
  group_by(tsid, treated_groups) %>% 
  summarise(percent_liquid = mean(lqdty_flag, na.rm = TRUE),
            percent_sig_fo = mean(foreign_flag, na.rm = TRUE),
            num_companies = n()) %>% 
  filter(num_companies >= 10)

write_csv(capex_DD_stats, "capex_DD_stats.csv")

###### BAS DATA

bas_data <- fread("P:/Company Tax/Data/EUM/bas_eum_reg_data.csv")

# Creating Treated Variables, if they are not a base rate entity, they don't get the lower tax rate
bas_data <- bas_data[, treated_groups := fcase(
  total_comp_income < 2 * 10^6, "treated_15",
  total_comp_income < 10 * 10^6 & total_comp_income >= 2 * 10^6, "treated_16",
  total_comp_income < 25 * 10^6 & total_comp_income >= 10 * 10^6, "treated_17",
  total_comp_income < 50 * 10^6 & total_comp_income >= 25 * 10^6, "treated_18",
  total_comp_income > 50 * 10^6, "not treated")]

# Correcting for base rate entity
bas_data <- bas_data[, treated_groups := fifelse(base_rate_entity == 0 & tsid >= 18, "not treated", treated_groups)]  

average_bas_data <- bas_data %>% 
  group_by(treated_groups, tsid) %>% 
  filter(between(capex, quantile(capex, 0.01), quantile(capex, 0.99))) %>% 
  summarise(average_investment = mean(capex, na.rm = TRUE),
            num_companies = n(),
            one_50_check = max(capex) / sum(capex, na.rm = TRUE),
            two_67_check = (sort(unique(capex), decreasing = TRUE)[2] + max(capex)) / sum(capex, na.rm = TRUE)) %>% 
  ungroup() %>% 
  filter(tsid < 22)

write_csv(average_bas_data, "average_bas_data.csv")

bas_DD_stats <- bas_data %>% 
  group_by(tsid, treated_groups) %>% 
  summarise(
    percent_liquid = mean(lqdty_flag, na.rm = TRUE), 
    percent_sig_fo = mean(foreign_flag, na.rm = TRUE),
    num_companies = n())


write_csv(bas_DD_stats, "bas_DD_stats.csv")






