2PL versus 3PL predictions
Consider the IRT 3PL model: \[\Pr(y=1)=c+\frac{1-c}{1+\text{exp}(-a(\theta-b))}.\] The 3PL model is meant to account for guessing behavior via the \(c\) parameter but this parameter is also difficult to estimate. A different question is how well the 2PL model (where we constrain \(c=0\)) may do in terms of producing similar predictions as the 3PL. We illustrate this by simulating responses from the 3PL for a single item, estimating 3PL and 2PL item parameters based on those responses and true abilities, and then computing the IMV based on predictions of new responses.
We fix \(a\) and \(b\) in this example and consider the IMV as a function of either the \(c\) value (Tab 1) or the sample size (Tab 2). In both tabs the plotted quantity is IMV(2PL, 3PL) — that is, the predictive gain of the 3PL over the 2PL. Positive values mean the 3PL predicts new responses better; negative values mean the 2PL wins (typically due to overfitting the \(c\) parameter in small samples).
- For a large sample (\(N=5000\)) in Tab 1, note that the IMV increases as a function of \(c\) but is never very large (e.g., above 0.005). The IMV is also near zero when \(c\) is near zero, which is to be expected.
- In Tab 2, small values of \(c\) produce IMVs very near zero when the sample size is large and sometimes produce negative values when the sample size is small (i.e., overfitting of \(c\)). When \(c\) is larger, IMV values are more consistent but, again, never very large.
#| '!! shinylive warning !!': |
#| shinylive does not work in self-contained HTML documents.
#| Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 900
imv.binary<-function(y, p1, p2, sigma=1e-4) {
p1<-ifelse(p1<sigma,sigma,p1)
p2<-ifelse(p2<sigma,sigma,p2)
p1<-ifelse(p1>1-sigma,1-sigma,p1)
p2<-ifelse(p2>1-sigma,1-sigma,p2)
ll<-function(x,p) {
z<-log(p)*x+log(1-p)*(1-x)
z<-sum(z)/length(x)
exp(z)
}
loglik1<-ll(y,p1)
loglik2<-ll(y,p2)
getcoins<-function(a) {
f<-function(p,a) abs(p*log(p)+(1-p)*log(1-p)-log(a))
nlminb(.5,f,lower=0.001,upper=.999,a=a)$par
}
c1<-getcoins(loglik1)
c2<-getcoins(loglik2)
ew<-function(p1,p0) (p1-p0)/p0
imv<-ew(c2,c1)
imv
}
library(shiny)
est <- function(x, th, fixc = FALSE) {
ll <- function(pars, x, th) {
a <- pars[1]
b <- pars[2]
if (length(pars) == 3) c <- pars[3] else c <- 0
p <- c + (1 - c) / (1 + exp(-a * (th - b)))
p <- pmax(pmin(p, 1 - 1e-10), 1e-10)
-1 * sum(x * log(p) + (1 - x) * log(1 - p))
}
if (fixc) pars <- c(1, 0) else pars <- c(1, 0, .1)
optim(pars, ll, x = x, th = th)$par
}
ui <- fluidPage(
titlePanel("Comparison of 2PL and 3PL predictions"),
sidebarLayout(
sidebarPanel(
h4("Tab 1: Sensitivity to c"),
sliderInput("n_persons_tab1", "Sample Size:",
min = 1000, max = 10000, value = 5000, step = 1000),
actionButton("simulate_tab1", "Generate New Data", class = "btn-primary"),
hr(),
h4("Tab 2: Sensitivity to Sample Size"),
sliderInput("c_param_tab2", "Guessing Parameter (c):",
min = 0, max = 0.5, value = 0.2, step = 0.01),
actionButton("simulate_tab2", "Generate New Data", class = "btn-primary"),
hr(),
p("Fixed parameters: a=1.2, b=-0.5"),
hr(),
helpText("Adjust the controls above and click the relevant button to run each simulation.")
),
mainPanel(
tabsetPanel(
tabPanel("Sensitivity to c",
h3("IMV as a Function of c Parameter"),
p("100 random c values sampled from Uniform(0, 0.5)"),
plotOutput("sensitivity_c_plot", height = "500px"),
hr(),
h4("Summary Statistics"),
verbatimTextOutput("summary_c_output")
),
tabPanel("Sensitivity to Sample Size",
h3("IMV as a Function of Sample Size"),
p("Fixed c parameter, varying sample size"),
plotOutput("sensitivity_n_plot", height = "500px"),
hr(),
h4("Summary Statistics"),
verbatimTextOutput("summary_n_output")
)
)
)
)
)
server <- function(input, output, session) {
a_param <- 1.2
b_param <- -0.5
n_samples_c <- 100
sim_data <- reactiveValues(sensitivity_c = NULL, sensitivity_n = NULL)
generate_data_c <- function() {
withProgress(message = 'Generating c sensitivity data...', value = 0, {
th <- rnorm(input$n_persons_tab1, mean = 0, sd = 1)
p_fixed <- 0.2 + (1 - 0.2) / (1 + exp(-a_param * (th - b_param)))
responses2 <- rbinom(input$n_persons_tab1, 1, p_fixed)
set.seed(NULL)
c_values <- sort(runif(n_samples_c, 0, 0.5))
sens_results <- data.frame(c = numeric(), om = numeric())
for (i in seq_along(c_values)) {
incProgress(1/n_samples_c, detail = paste("Sample", i, "of", n_samples_c))
c_val <- c_values[i]
p_temp <- c_val + (1 - c_val) / (1 + exp(-a_param * (th - b_param)))
resp_temp <- rbinom(input$n_persons_tab1, 1, p_temp)
est3_temp <- est(th = th, x = resp_temp)
est2_temp <- est(th = th, x = resp_temp, fixc = TRUE)
p3_temp <- est3_temp[3] + (1 - est3_temp[3]) /
(1 + exp(-est3_temp[1] * (th - est3_temp[2])))
p2_temp <- 1 / (1 + exp(-est2_temp[1] * (th - est2_temp[2])))
omv_temp <- imv.binary(responses2, p2_temp, p3_temp)
sens_results <- rbind(sens_results, data.frame(
c = c_val, om = omv_temp
))
}
sim_data$sensitivity_c <- sens_results
})
}
generate_data_n <- function() {
withProgress(message = 'Generating sample size sensitivity data...', value = 0, {
n_values <- seq(500, 10000, by = 500)
sens_results <- data.frame(n = numeric(), om = numeric())
for (i in seq_along(n_values)) {
incProgress(1/length(n_values), detail = paste("n =", n_values[i]))
n_val <- n_values[i]
th <- rnorm(n_val, mean = 0, sd = 1)
p <- input$c_param_tab2 + (1 - input$c_param_tab2) /
(1 + exp(-a_param * (th - b_param)))
responses <- rbinom(n_val, 1, p)
responses2 <- rbinom(n_val, 1, p)
est3 <- est(th = th, x = responses)
est2 <- est(th = th, x = responses, fixc = TRUE)
p3 <- est3[3] + (1 - est3[3]) / (1 + exp(-est3[1] * (th - est3[2])))
p2 <- 1 / (1 + exp(-est2[1] * (th - est2[2])))
omv <- imv.binary(responses2, p2, p3)
sens_results <- rbind(sens_results, data.frame(
n = n_val, om = omv
))
}
sim_data$sensitivity_n <- sens_results
})
}
observeEvent(input$simulate_tab1, { generate_data_c() })
observeEvent(input$simulate_tab2, { generate_data_n() })
output$summary_c_output <- renderPrint({
req(sim_data$sensitivity_c)
cat("IMV(2PL, 3PL) across 100 c values:\n")
cat("====================================\n\n")
vals <- sim_data$sensitivity_c$om
cat(sprintf("Range: [%.4f, %.4f]\nMean: %.4f\nSD: %.4f\n",
min(vals), max(vals), mean(vals), sd(vals)))
})
output$sensitivity_c_plot <- renderPlot({
req(sim_data$sensitivity_c)
par(mar = c(5, 5, 4, 2))
plot(sim_data$sensitivity_c$c, sim_data$sensitivity_c$om,
type = "n", ylim = c(-0.003, 0.003),
xlab = "Guessing Parameter (c)", ylab = "IMV(2PL, 3PL)",
main = paste("Sensitivity to c (n =", input$n_persons_tab1, ")"),
cex.lab = 1.2)
grid(col = "lightgray")
abline(h = 0, lty = 2, col = "gray40")
points(sim_data$sensitivity_c$c, sim_data$sensitivity_c$om,
col = adjustcolor("steelblue", 0.5), pch = 16)
lines(lowess(sim_data$sensitivity_c$c, sim_data$sensitivity_c$om, f = 0.3),
col = "steelblue", lwd = 2)
legend("topleft", legend = "IMV(2PL, 3PL)",
col = "steelblue", lwd = 2, pch = 16, bg = "white", bty = "n")
})
output$summary_n_output <- renderPrint({
req(sim_data$sensitivity_n)
cat("IMV(2PL, 3PL) across sample sizes:\n")
cat("====================================\n")
cat(sprintf("c parameter: %.2f\n\n", input$c_param_tab2))
vals <- sim_data$sensitivity_n$om
cat(sprintf("Range: [%.4f, %.4f]\nMean: %.4f\nSD: %.4f\n",
min(vals), max(vals), mean(vals), sd(vals)))
})
output$sensitivity_n_plot <- renderPlot({
req(sim_data$sensitivity_n)
par(mar = c(5, 5, 4, 2))
plot(sim_data$sensitivity_n$n, sim_data$sensitivity_n$om,
type = "n", ylim = c(-0.003, 0.003),
xlab = "Sample Size", ylab = "IMV(2PL, 3PL)",
main = paste("Sensitivity to Sample Size (c =", input$c_param_tab2, ")"),
cex.lab = 1.2)
grid(col = "lightgray")
abline(h = 0, lty = 2, col = "gray40")
points(sim_data$sensitivity_n$n, sim_data$sensitivity_n$om,
col = adjustcolor("steelblue", 0.6), pch = 16, cex = 1.5)
lines(lowess(sim_data$sensitivity_n$n, sim_data$sensitivity_n$om, f = 0.3),
col = "steelblue", lwd = 2)
legend("topleft", legend = "IMV(2PL, 3PL)",
col = "steelblue", lwd = 2, pch = 16, bg = "white", bty = "n")
})
}
shinyApp(ui = ui, server = server)