# Load packages ----
library(tidyverse)
library(data.table)
library(readr)
library(ggplot2)
library(scales)
library(dplyr)


# Import data ----
Master_data <- read_csv("R:/blade-standard/BIT (ABN)/CSV/bit_comp_bn_2223.csv")

# Clean variables and names ----
Clean <- Master_data %>% 
  mutate(#renaming variables
    
    across(starts_with("c_"), ~ifelse(is.na(.x), 0, .x)),
    foreign_own = case_when(c_frgnshre > 0 ~ 1,
                            c_frgnshre == 0 | is.na(c_frgnshre) ~ 0), #businesses can leave blank if not passed
    taxrate = case_when(c_base_rt_bus_cd == 5 ~ 0.25,
                        c_base_rt_bus_cd != 5 | is.na(c_base_rt_bus_cd) ~ 0.30),
    total_income = c_totlinc,
    # Industry
    c_anzsic = as.numeric(c_anzsic),
    industry_division_name_BIT = case_when(
      c_anzsic < 6000 ~ "Agriculture, Forestry and Fishing",
      c_anzsic >= 6000 & c_anzsic < 11000 ~ "Mining",
      c_anzsic >= 11000 & c_anzsic < 26000 ~ "Manufacturing",
      c_anzsic >= 26000 & c_anzsic < 30000 ~ "Electricity, Gas, Water and Waste Services",
      c_anzsic >= 30000 & c_anzsic < 33000 ~ "Construction",
      c_anzsic >= 33000 & c_anzsic < 39000 ~ "Wholesale Trade",
      c_anzsic >= 39000 & c_anzsic < 44000 ~ "Retail Trade",
      c_anzsic >= 44000 & c_anzsic < 46000 ~ "Accommodation and Food Services",
      c_anzsic >= 46000 & c_anzsic < 54000 ~ "Transport, Postal and Warehousing",
      c_anzsic >= 54000 & c_anzsic < 62000 ~ "Information Media and Telecommunications",
      c_anzsic >= 62000 & c_anzsic < 66000 ~ "Financial and Insurance Services",
      c_anzsic >= 66000 & c_anzsic < 69000 ~ "Rental, Hiring and Real Estate Services",
      c_anzsic >= 69000 & c_anzsic < 72000 ~ "Professional, Scientific and Technical Services",
      c_anzsic >= 72000 & c_anzsic < 75000 ~ "Administrative and Support Services",
      c_anzsic >= 75000 & c_anzsic < 80000 ~ "Public Administration and Safety",
      c_anzsic >= 80000 & c_anzsic < 84000 ~ "Education and Training",
      c_anzsic >= 84000 & c_anzsic < 89000 ~ "Healthcare and Social Assistance",
      c_anzsic >= 89000 & c_anzsic < 94000 ~ "Arts and Recreation Services",
      c_anzsic >= 94000 & c_anzsic < 99999 ~ "Other Services + ATO use only"),
    
    industry_division_div_BIT = case_when(
      c_anzsic < 6000 ~ "A",
      c_anzsic >= 6000 & c_anzsic < 11000 ~ "B",
      c_anzsic >= 11000 & c_anzsic < 26000 ~ "C",
      c_anzsic >= 26000 & c_anzsic < 30000 ~ "D",
      c_anzsic >= 30000 & c_anzsic < 33000 ~ "E",
      c_anzsic >= 33000 & c_anzsic < 39000 ~ "F",
      c_anzsic >= 39000 & c_anzsic < 44000 ~ "G",
      c_anzsic >= 44000 & c_anzsic < 46000 ~ "H",
      c_anzsic >= 46000 & c_anzsic < 54000 ~ "I",
      c_anzsic >= 54000 & c_anzsic < 62000 ~ "J",
      c_anzsic >= 62000 & c_anzsic < 66000 ~ "K",
      c_anzsic >= 66000 & c_anzsic < 69000 ~ "L",
      c_anzsic >= 69000 & c_anzsic < 72000 ~ "M",
      c_anzsic >= 72000 & c_anzsic < 75000 ~ "N",
      c_anzsic >= 75000 & c_anzsic < 80000 ~ "O",
      c_anzsic >= 80000 & c_anzsic < 84000 ~ "P",
      c_anzsic >= 84000 & c_anzsic < 89000 ~ "Q",
      c_anzsic >= 89000 & c_anzsic < 94000 ~ "R",
      c_anzsic >= 94000 & c_anzsic < 99999 ~ "S"),
  )

# Define key NCT considerations ----

Clean <- Clean %>% 
  mutate(
    #Establishing cohorts
    cohort_income = case_when(taxrate == 0.25 ~ "1 - sub $50m", #current base rate group
                              taxrate == 0.30 & total_income < 50 * 1000000 ~ "2 - sub $50m passive", #passive income
                              taxrate == 0.30 & total_income >= 50 * 1000000 & total_income < 1000 * 1000000 ~ "3 - $50m to $1b", #$1b base rate group
                              taxrate == 0.30 & total_income >= 1000 * 1000000 ~ "4 - over $1b"), #remaining
  ) %>% 
  select(!starts_with("c_")) #remove most uncleaned data

# Foreign ownership analysis ----
Foreign <- Clean %>% 
  group_by(cohort_income, taxrate) %>% 
  summarise(
    n = n(),
    foreign_owner_n = sum(foreign_own)) %>%
  
  #Rule of 10
  mutate(n = round(n, digits = -1),
         foreign_owner_n = round(foreign_owner_n, digits = -1))
write_csv(Foreign, "Foreign_ownership_FY23.csv")

# Foreign industry by cohort ----
Foreign_industry_all <- Clean %>% 
  filter(!is.na(total_income)) %>% 
  mutate(
    cohort_income2 = case_when(cohort_income == "1 - sub $50m" ~ "1 - sub $50m", #unchanged
                               cohort_income == "2 - sub $50m passive" ~ "2 - sub $50m passive", #unchanged
                               cohort_income == "3 - $50m to $1b" ~ "3a - over $50m", #Combined
                               cohort_income == "4 - over $1b" ~ "3a - over $50m") #Combined
  ) %>% 
  group_by(cohort_income, cohort_income2, taxrate, industry_division_div_BIT, industry_division_name_BIT) %>% #industry_division_name
  summarise(
    n = n(),
    n_foreign_owner = sum(foreign_own),
    
    total_income_n = sum(total_income),
    total_income_foreign_owner = sum(total_income[foreign_own == 1]),
    
    #Dominance checks
    total_income_1_50_check = max(total_income, na.rm = TRUE) / sum(total_income, na.rm = TRUE),
    total_income_2_67_check = (sort(unique(total_income), decreasing = TRUE)[2] + max(total_income, na.rm = TRUE)) / sum(total_income, na.rm = TRUE),
    total_income_owner_1_50_check = max(total_income[foreign_own == 1], na.rm = TRUE) / sum(total_income[foreign_own == 1], na.rm = TRUE),
    total_income_owner_2_67_check = (sort(unique(total_income[foreign_own == 1]), decreasing = TRUE)[2] + max(total_income[foreign_own == 1], na.rm = TRUE)) / sum(total_income[foreign_own == 1], na.rm = TRUE),
    ) %>% 
  ungroup()

# Foreign ownership by industry, reduced cohorts
Foreign_industry_over50 <- Clean %>% 
  filter(!is.na(total_income)) %>% 
  mutate(
    cohort_income2 = case_when(cohort_income == "1 - sub $50m" ~ "1 - sub $50m", #unchanged
                               cohort_income == "2 - sub $50m passive" ~ "2 - sub $50m passive", #unchanged
                               cohort_income == "3 - $50m to $1b" ~ "3a - over $50m", #Combined
                               cohort_income == "4 - over $1b" ~ "3a - over $50m") #Combined
  ) %>% 
  group_by(cohort_income2, taxrate, industry_division_div_BIT, industry_division_name_BIT) %>% #industry_division_name
  summarise(
    n = n(),
    n_foreign_owner = sum(foreign_own),
    
    total_income_n = sum(total_income),
    total_income_foreign_owner = sum(total_income[foreign_own == 1]),
    
    #Dominance checks
    total_income_1_50_check = max(total_income, na.rm = TRUE) / sum(total_income, na.rm = TRUE),
    total_income_2_67_check = (sort(unique(total_income), decreasing = TRUE)[2] + max(total_income, na.rm = TRUE)) / sum(total_income, na.rm = TRUE),
    total_income_owner_1_50_check = max(total_income[foreign_own == 1], na.rm = TRUE) / sum(total_income[foreign_own == 1], na.rm = TRUE),
    total_income_owner_2_67_check = (sort(unique(total_income[foreign_own == 1]), decreasing = TRUE)[2] + max(total_income[foreign_own == 1], na.rm = TRUE)) / sum(total_income[foreign_own == 1], na.rm = TRUE),
    ) %>% 
  ungroup() %>% 
  filter(cohort_income2 == "3a - over $50m") %>% 
  mutate(cohort_income = NA)


#consolidate and rule of 10 - foreign ownership
Foreign_industry_consolidated_owner <- rbind(Foreign_industry_all, Foreign_industry_over50) %>% 
  filter(n >= 10 & n_foreign_owner >= 10) %>% 
  group_by(taxrate, industry_division_div_BIT, industry_division_name_BIT) %>% 
  mutate(
    industry_n = sum(cohort_income2 == "3a - over $50m"),
  ) %>% 
  ungroup() %>% 
  #Extract appropriate level of disaggregation
  mutate(
    filter = case_when(industry_n == 3 & is.na(cohort_income) ~ 1, #Check if disaggregated analysis passes - if so, filter out the more aggregated analysis
                       industry_n == 2 & (cohort_income == "3 - $50m to $1b" | cohort_income == "4 - over $1b") ~ 1), #Check if disaggregated analysis passes - if not, filter out the more disaggregated analysis
  ) %>% 
  filter(is.na(filter)) %>% 
  select(-filter, -industry_n) %>% 
  #Filter out data where dominance checks do not pass
  mutate(total_income_foreign_owner = ifelse(total_income_owner_1_50_check >= 0.5 | total_income_owner_2_67_check >= 0.67,
                                             NA,
                                             total_income_foreign_owner)
         ) %>% 
  #Round to pass the rule of 10
  mutate(n = round(n, digits = -1),
         n_foreign_owner = round(n_foreign_owner, digits = -1))

write_csv(Foreign_industry_consolidated_owner, "Foreign_industry_ownership_FY23.csv")









