##############################################################################
#----------------------------------------------------------------------------#
############################# DATA TRANSFORMATION ############################
#----------------------------------------------------------------------------#
##############################################################################

# Import ROSALI and RESALI

ros_mdc <- read_excel("/home/corentin/Documents/These/Recherche/Simulations/Analysis/ROSALI-DIF/N300/6A_300_original.xls")
res_mdc <- read_excel("/home/corentin/Documents/These/Recherche/Simulations/Analysis/RESALI/Results/N300/6A_300_original.xls")


# Perform MH

library(difR)

dat_mh <- read.csv('/home/corentin/Documents/These/Recherche/Simulations/Data/DIF/N300/scenario_6A_300.csv')[,c("item1","item2","item3","item4",'replication',"TT")]

det_mh <- c()
for (k in 1:1000) {
  if (k%%1000==0) {
    cat(paste0(k,'/1000\n'))
  }
  dat_mh_temp <- dat_mh[dat_mh$replication==k,c("item1",'item2',"item3","item4",'TT')]
  aa <- difMH(Data=dat_mh_temp,group = "TT",focal.name = 0,exact=F)
  det_mh <- c(det_mh,1:4 %in% aa$DIFitems)
}

# Create 1 line per item per replication in df
library(tidyr)

da <- as.data.frame(sapply(1:4, function(k) sapply(1:1000,function(x) k%in%ros_mdc[x,paste0("dif_detect_",1:4)])))
db <- as.data.frame(sapply(1:4, function(k) sapply(1:1000,function(x) k%in%res_mdc[x,paste0("dif_detect_",1:4)])))
dc <- as.data.frame(sapply(1:4, function(k) sapply(1:1000,function(x) k%in%res_mdc[x,paste0("real_dif_",1)])))

data_mdca <- data.frame(rosali=da)
data_mdca <- pivot_longer(data_mdca,cols=1:4)
data_mdcb <- data.frame(resali=db)
data_mdcb <- pivot_longer(data_mdcb,cols=1:4)
data_mdcc <- data.frame(real=dc)
data_mdcc <- pivot_longer(data_mdcc,cols=1:4)

data_mdc <- cbind(data_mdca,data_mdcb,data_mdcc)[,c(2,4,6)]
colnames(data_mdc) <- c("rosali","resali","real")

make_repl <- function(kk) {
  b <- c()
  for (k in kk) {
    a <- rep(k,4)
    b <- c(b,a)
  }
  return(b)
}

data_mdc$mh <- det_mh

data_mdc$replication <- make_repl(1:1000)


##############################################################################
#----------------------------------------------------------------------------#
########################### FIT DIF DETECTION MODEL ##########################
#----------------------------------------------------------------------------#
##############################################################################

# Fit TAN model



# Fit logistic model, stratified on replication

mod_glm <- glm(formula = real~rosali+resali,data = data_mdc[1:2000,],family = binomial())
data_valid <- data_mdc[2000:4000,]
data_valid$predict <- predict(mod_glm,newdata = data_valid)
roc_c <- pROC::roc(response=data_valid$real,predictor=data_valid$predict)

data_mdc$logit_pred <- predict(mod_glm,newdata = data_mdc)>=-0.6275167

perf_moreflex <- c()
for (k in 1:1000) {
  dattt <- data_mdc[4*(k-1)+1:4,]
  perf_moreflex <- c(perf_moreflex,all(rownames(dattt[dattt$real==TRUE,])%in%rownames(dattt[dattt$logit_pred==TRUE,])))
}

##############################################################################
#----------------------------------------------------------------------------#
######################## FIT UNIFORMITY DETECTION MODEL ######################
#----------------------------------------------------------------------------#
##############################################################################