

library(tidyverse)
library(data.table)
library(tictoc)
library(zoo)
library(arrow)

tic()
# BAS -------------------------------------------------------------------------
# BAS company data

bas_company_data <- read_parquet("P:/Company Tax/Data/EUM/bas_eum_company_data.parquet")
bas_company_data <- bas_company_data[, .(id, tsid, turnover, capex)]

# Company BIT data
bit_comp_data <- read_parquet("P:/Company Tax/Data/EUM/bit_comp_eum_data.parquet")
bit_comp_data <- bit_comp_data[, .(id, tsid, c_currasst, c_currliab, c_totlinc)]

# Join BIT and BAS data
bit_bas_full_join <- full_join(bas_company_data, bit_comp_data, by = c("id", "tsid"))

# Payg -------------------------------------------------------------------------
# Import payg data
payg_data <- read_parquet("P:/Company Tax/Data/EUM/payg_eum_data.parquet")
payg_data <- payg_data[, .(id, tsid, fte)]

# Join with BIT, BAS, birth
bit_bas_payg_fj <- full_join(bit_bas_full_join, payg_data, by = c("id", "tsid"))


# Remove all dataframes except for bit_bas_payg_fj to free up space
rm(list = setdiff(ls(), "bit_bas_payg_fj"))

bit_bas_payg_fj <- bit_bas_payg_fj[!is.na(capex) & 
                                     !is.na(fte) & 
                                     !is.na(c_totlinc)]

# CSI industry -------------------------------------------------------------------------
# Now Merging to obtain industry column
industry_data <- read_parquet("P:/Company Tax/Data/EUM/CSI_Industry_EUM_data.parquet")

DnD_master_data <- full_join(bit_bas_payg_fj, industry_data, by = c("id", "tsid"))

write_parquet(DnD_master_data, "P:/Company Tax/Data/EUM/DnD_eum_master_data.parquet")













