Simulate response patterns under the framework of IRT

Using mirt package

Simulation

Mirt

Author

Jihong Zhang

Published

April 15, 2024

1 Research question:

How different methods of cutting item response affecting parameter estimation accuracy?
- Merge first two categories
- Merge the middle category into the neighboring category

We need to pre-specify a (item slopes) and b (item intercepts). mu (factor mean) and sigma (factor correlation) are optional.

library(mirt)
# Simulation for 6 5-category items
set.seed(1234)
J = 6 # number of items
N = 1000 # number of participants
a <- matrix(rlnorm(J, mean = 0, sd = 1), ncol = 1)
a

           [,1]
[1,] 0.29907355
[2,] 1.31973273
[3,] 2.95778648
[4,] 0.09578035
[5,] 1.53591253
[6,] 1.65873604

# for the graded model, ensure that there is enough space between the intercepts,
# otherwise closer categories will not be selected often (minimum distance of 0.3 here)
diffs <- t(apply(matrix(runif(J*4, .3, 1), nrow = J), 1, cumsum))
d <- -diffs + rnorm(J) + 4
d

         [,1]     [,2]     [,3]      [,4]
[1,] 2.664915 2.234209 1.781049 1.1617851
[2,] 5.469432 5.006874 4.139455 3.6538239
[3,] 3.629467 3.107838 2.439850 1.9265796
[4,] 2.623207 2.111322 1.171061 0.5159463
[5,] 3.059096 2.647764 1.765822 1.3390547
[6,] 3.972815 3.644818 3.312779 2.4810091

2 Data generation

2.1 Model 1: population Data

## Population data: 5-category: 0-4
dat1 <- simdata(a, d, N, itemtype = rep('graded', 6))
head(dat1)

     Item_1 Item_2 Item_3 Item_4 Item_5 Item_6
[1,]      4      4      0      2      2      3
[2,]      4      4      0      3      1      1
[3,]      4      4      4      3      4      4
[4,]      4      4      4      4      3      4
[5,]      4      4      2      2      4      4
[6,]      4      4      0      2      4      4

apply(dat1, 2, min)

Item_1 Item_2 Item_3 Item_4 Item_5 Item_6 
     0      0      0      0      0      0

apply(dat1, 2, max)

Item_1 Item_2 Item_3 Item_4 Item_5 Item_6 
     4      4      4      4      4      4

2.2 Model 2: 4-category data with first two categories merged

## Data2: Merged-category data: 4-category, 0-1->0,2->1,3->2,4->3
dat2 <- dat1
dat2[dat2%in%0:1] <- 0
dat2[dat2==2] <- 1
dat2[dat2==3] <- 2
dat2[dat2==4] <- 3
apply(dat2, 2, min)

Item_1 Item_2 Item_3 Item_4 Item_5 Item_6 
     0      0      0      0      0      0

apply(dat2, 2, max)

Item_1 Item_2 Item_3 Item_4 Item_5 Item_6 
     3      3      3      3      3      3

2.3 Model 3: 4-category data with middle 2 categories merged

## Data3: Merged-category data: 4-category, 0->0,1-2->1,3->2,4->3
dat3 <- dat1
dat3[dat3%in%1:2] <- 1
dat3[dat3==3] <- 2
dat3[dat3==4] <- 3
apply(dat3, 2, min)

Item_1 Item_2 Item_3 Item_4 Item_5 Item_6 
     0      0      0      0      0      0

apply(dat3, 2, max)

Item_1 Item_2 Item_3 Item_4 Item_5 Item_6 
     3      3      3      3      3      3

3 Data analysis

## Population model
mod1 <- mirt(dat1, model = 1)
mod1_param <- coef(mod1, simplify = TRUE)$items
mod1_itemfit <- itemfit(mod1)

## Tail merge model: model2
mod2 <- mirt(dat2, model = 1)
mod2_param <- coef(mod2, simplify = TRUE)$items
mod2_itemfit <- itemfit(mod2)

## Middle category merge model: model3
mod3 <- mirt(dat3, model = 1)
mod3_param <- coef(mod3, simplify = TRUE)$items
mod3_itemfit <- itemfit(mod3)

4 Parameter recovery

         [,1]     [,2]     [,3]      [,4]
[1,] 2.664915 2.234209 1.781049 1.1617851
[2,] 5.469432 5.006874 4.139455 3.6538239
[3,] 3.629467 3.107838 2.439850 1.9265796
[4,] 2.623207 2.111322 1.171061 0.5159463
[5,] 3.059096 2.647764 1.765822 1.3390547
[6,] 3.972815 3.644818 3.312779 2.4810091

kableExtra::kable(mod1_param[,2:5], digits = 3)

	d1	d2	d3	d4
Item_1	2.736	2.385	1.904	1.305
Item_2	4.859	4.593	3.841	3.362
Item_3	3.313	2.689	2.066	1.585
Item_4	2.701	2.203	1.191	0.533
Item_5	2.984	2.647	1.815	1.395
Item_6	3.998	3.611	3.188	2.445

rev_bias <- function(true, est) {
  diffs = abs(true - est)
  mean(diffs)
}

rmse <- function(true, est) {
  sqrt(mean((true - est)^2))
}


Results <- data.frame(
  Bias = c(
    rev_bias(true = as.numeric(a), est = mod1_param[,'a1']),
    rev_bias(true = as.numeric(a), est = mod2_param[,'a1']),
    rev_bias(true = as.numeric(a), est = mod3_param[,'a1'])
  ),
  RMSE = c(
    rmse(true = as.numeric(a), est = mod1_param[,'a1']),
    rmse(true = as.numeric(a), est = mod2_param[,'a1']),
    rmse(true = as.numeric(a), est = mod3_param[,'a1'])
  )
)
rownames(Results) <- paste0("Model ", 1:3)
kableExtra::kable(Results, digits = 3)

	Bias	RMSE
Model 1	0.138	0.181
Model 2	0.148	0.192
Model 3	0.144	0.199

5 Item fit

Model 1 (population model) seems to have overall best item fits. Model 2 (tail merged model) seems to have most worst-fitting items. Model 3 (middle-range merged model) seems to have the worst-fitting item (item 6).

RMSEA_mat <- rbind(
  mod1 = mod1_itemfit$RMSEA.S_X2,
  mod2 = mod2_itemfit$RMSEA.S_X2,
  mod3 = mod3_itemfit$RMSEA.S_X2
)
colnames(RMSEA_mat) <- paste0('Item', 1:6)
rownames(RMSEA_mat) <- paste0('Model ', 1:3)
kableExtra::kable(RMSEA_mat, digits = 3)

	Item3	Item4	Item5	Item6
Model 1	0.007	0.000	0.000	0.015
Model 2	0.000	0.014	0.014	0.018
Model 3	0.000	0.000	0.000	0.026

6 Global fit

M2_df <- rbind(
  M2(mod1, "C2"),
  M2(mod2, "C2"),
  M2(mod3, "C2")
)
rownames(M2_df) <- paste0("Model ", 1:3)
kableExtra::kable(M2_df, digits = 3)

	M2	df	p	RMSEA	RMSEA_95	SRMSR	TLI	CFI
Model 1	7.446	9	0.591	0.000	0.031	0.024	1.006	1.000
Model 2	9.723	9	0.373	0.009	0.037	0.027	0.997	0.998
Model 3	9.265	9	0.413	0.005	0.036	0.027	0.999	0.999

7 MCMC simulation

library(parSim)
Results <- parSim(
  J = c(6, 10, 20),
  N = c(300, 500, 1000),
  # Number of repititions?
  reps = 100, # more is better!
  
  # Parallel?
  nCores = 8,
  export = c("rev_bias"),
  expression = {
      library(mirt)
      a <- matrix(rlnorm(J, mean = 0, sd = 1), ncol = 1)
      diffs <- t(apply(matrix(runif(J*4, .3, 1), nrow = J), 1, cumsum))
      d <- -diffs + rnorm(J) + 4
      dat1 <- simdata(a, d, N, itemtype = rep('graded', J))
      dat2 <- dat1
      dat2[dat2%in%0:1] <- 0
      dat2[dat2==2] <- 1
      dat2[dat2==3] <- 2
      dat2[dat2==4] <- 3
      dat3 <- dat1
      dat3[dat3%in%1:2] <- 1
      dat3[dat3==3] <- 2
      dat3[dat3==4] <- 3
      ## Population model
      mod1 <- mirt(dat1, model = 1)
      mod1_param <- coef(mod1, simplify = TRUE)$items
      mod1_gfi <- M2(mod1, "C2")
      
      ## Tail merge model: model2
      mod2 <- mirt(dat2, model = 1)
      mod2_param <- coef(mod2, simplify = TRUE)$items
      mod2_gfi <- M2(mod2, "C2")
      
      ## middle category merge model: model3
      mod3 <- mirt(dat3, model = 1)
      mod3_param <- coef(mod3, simplify = TRUE)$items
      mod3_gfi <- M2(mod3, "C2")
      ## output
      data.frame(
       bias_mod1 = rev_bias(true = as.numeric(a), est = mod1_param[,'a1']),
       bias_mod2 = rev_bias(true = as.numeric(a), est = mod2_param[,'a1']),
       bias_mod3 = rev_bias(true = as.numeric(a), est = mod3_param[,'a1']),
       RMSEA_mod1 = mod1_gfi$RMSEA,
       RMSEA_mod2 = mod2_gfi$RMSEA,
       RMSEA_mod3 = mod3_gfi$RMSEA,
       M2_mod1 = mod1_gfi$M2,
       M2_mod2 = mod2_gfi$M2,
       M2_mod3 = mod3_gfi$M2,
       SRMSR_mod1 = mod1_gfi$SRMSR,
       SRMSR_mod2 = mod2_gfi$SRMSR,
       SRMSR_mod3 = mod3_gfi$SRMSR,
       TLI_mod1 = mod1_gfi$TLI,
       TLI_mod2 = mod2_gfi$TLI,
       TLI_mod3 = mod3_gfi$TLI
      )
  }
)

library(ggplot2)
library(dplyr)
library(tidyr)
Results_plot <- Results |> 
  pivot_longer(c(starts_with("bias"), 
                 starts_with("RMSEA"), 
                 starts_with("SRMSR"), 
                 starts_with("TLI")), 
               names_to = 'Model', values_to = 'Value') |> 
  separate(Model, into = c("Measure", "Model")) |> 
  group_by(J, N, Model, Measure) |> 
  summarise(Value = mean(Value, na.rm = T)) |> 
  mutate(N = factor(N, levels = c(300, 500, 1000)))

Results_plot |> 
  ggplot() +
  geom_path(aes(x = N, y = Value, col = Model, group = Model)) +
  geom_point(aes(x = N, y = Value, col = Model)) +
  facet_grid(Measure ~ J, scales = "free") +
  theme_bw()

8 Conclusion

There are not much differences in global model fits regarding varied cutting methods of Likert-scale item responses under the conditions in the simulation.
Increases of test length and sample sizes will increase estimation accuracy for both cutting methods.
Item fits will differ by varied cutting methods.
Goodness-of-fit may not perform well evaluating cutting methods of item responses.