## File Name: residif.R
## File version: 1.0

#' RESIDIF procedure for DIF detection as per Andrich and Hagquist (2015)
#'
#' This function detects DIF on PCM items using ANOVA of person-item residuals
#'
#' @param df data.frame containing the data
#' @param items vector containing the names of columns where item responses are stored in df
#' @param grp vector containing the name of the column where an optional group membership variable is stored in df
#' @param method.theta string determining the estimation method for individual latent variable values. Either "eap", "mle" or "wle"
#' @param verbose set to TRUE to print a detailed output, FALSE otherwise
#' @return A data.frame containing a column listing the detected DIF item and another listing detected DIF forms
#' @import vcrpart
#' @import PP
#' @export

residif <- function(df=NULL,items=NULL,grp=NULL,method.theta="eap",verbose=T) {
  if (any(!(items %in% colnames(df)))) {
    stop("ERROR: provided item name does not exist in df")
  }
  if (any(!(grp %in% colnames(df)))) {
    stop("ERROR: provided grp variable name does not exist in df")
  }
  if (any(is.null(grp))) {
    stop("ERROR: no grp variable provided")
  }
  if (any(is.null(items))) {
    stop("ERROR: no items provided")
  }

  maxcat <- max(df[,items])
  nbitems <- length(items)
  nbitems_o <- nbitems

  if (verbose) {
    cat('\n')
    cat("#################################################################################################\n")
    cat("##################################### COMPUTING INITIAL PCM #####################################\n")
    cat("#################################################################################################\n")
  }
  startt <- Sys.time()
  pcm_initial <- pcm(df = df,items = items,grp = grp,verbose=F,method.theta = method.theta)
  dat <- df
  dat$score <- rowSums(dat[,items])
  nqt <- ifelse(length(unique(quantile(dat$score,seq(0,1,0.2))))==6,5,length(unique(quantile(dat$score,seq(0,1,0.2))))-1)
  while (length(unique(quantile(dat$score,seq(0,1,1/nqt))))!=nqt+1) {
    nqt <- nqt-1
  }
  dat$score_q5 <- cut(dat$score,unique(quantile(dat$score,seq(0,1,1/nqt))),labels=1:nqt,include.lowest=T)
  res.anova <- rep(NA,nbitems)
  pval <- rep(NA,nbitems)
  fval <- rep(NA,nbitems)
  for (i in 1:nbitems) {
    dat[,paste0('res_',i)] <- pcm_initial$residuals[,i]
    res.anova[i] <- summary(aov(dat[,paste0('res_',i)]~get(grp)*score_q5,data=dat))
    pval[c(i,i+nbitems)] <- c(res.anova[[i]][1,"Pr(>F)"],res.anova[[i]][3,"Pr(>F)"])
    fval[c(i,i+nbitems)] <- c(res.anova[[i]][1,'F value'],res.anova[[i]][3,"F value"])
  }
  if (verbose) {
    cat('DONE\n')
    cat('#################################################################################################\n')
  }
  res.items <- c()
  res.uniform <- c()
  resp <- df[,items]
  k <- 1
  while(any(pval<0.05/(nbitems_o*3))) {
    k <- k+1
    if (verbose) {
      cat(paste("######################################## COMPUTING STEP",k,"#######################################\n"))
      cat("#################################################################################################\n")
    }
    numitem <- ifelse(which.max(fval)%%(length(fval)/2)!=0,which.max(fval)%%(length(fval)/2),length(fval)/2)
    res.item <- gsub("[a-z]", "",colnames(resp)[numitem])
    res.items <- c(res.items,res.item)
    res.uni <- res.anova[[numitem]][3,"Pr(>F)"]>0.05
    res.uniform <- c(res.uniform,res.uni)
    pcm_while <- pcm(df = df,items = items,grp = grp,dif.items = res.items,type.dif = res.uniform,verbose=F,method.theta = method.theta)
    res.anova <- rep(NA,nbitems)
    pval <- rep(NA,nbitems_o)
    fval <- rep(NA,nbitems_o)
    numitems <- 1:nbitems_o
    numitems <- numitems[-which(numitems%in%res.items)]
    for (i in numitems) {
      dat[,paste0('res_',i)] <- pcm_while$residuals[,i]
      res.anova[i] <- summary(aov(dat[,paste0('res_',i)]~dat[,grp]*score_q5,data=dat))
      pval[c(i,i+nbitems)] <- c(res.anova[[i]][1,"Pr(>F)"],res.anova[[i]][3,"Pr(>F)"])
      fval[c(i,i+nbitems)] <- c(res.anova[[i]][1,'F value'],res.anova[[i]][3,"F value"])
    }
    for (i in 1:nbitems_o) {
      pval[i] <- ifelse(is.na(pval[i]),999,pval[i])
      fval[i] <- ifelse(is.na(fval[i]),-999,fval[i])
    }
    if (verbose) {
      cat('DONE\n')
      if (any(pval<0.05/(nbitems_o*3))) {
        cat('#################################################################################################\n')
      }
    }
  }
  endt <- Sys.time()
  cat(paste(c('Algorithm ran for',round(endt-startt,4),"seconds\n")))
  if (verbose) {
    cat('#################################################################################################\n')
    cat("###################################### DETECTED DIF ITEMS #######################################\n")
    cat("#################################################################################################\n")
  }
  if (length(res.items>0)) {
    results <- data.frame(dif.items=res.items,
                          uniform=ifelse(res.uniform==1,TRUE,FALSE))
    return(results)
  }
  else {
    if (verbose) {
      cat("No DIF was detected\n")
    }
    return(NULL)
  }
}