Implied Probabilities and the IMV
Polytomous response models — including the graded response model (GRM), partial credit model (PCM), and sequential response model (SRM) — are each motivated by assumptions about specific dichotomizations of a polytomous outcome \(x\) scored in \(K\) ordered categories. The implied probabilities of a model are the probabilities associated with the full range of possible dichotomizations of \(x\), computed directly from the category response function (CRF).
A dichotomization is characterized by a target set \(T \subseteq \{0, \ldots, K-1\}\) and an excluded set \(E \subseteq \{0, \ldots, K-1\} \setminus T\). The implied probability is:
\[ \Pr\!\left(x \in T \;\middle|\; x \in T \cup E,\, \theta\right) = \frac{\sum_{k \in T} \Pr(x = k \mid \theta)}{\sum_{k \in T \cup E} \Pr(x = k \mid \theta)} \]
When \(E = \emptyset\) this reduces to the standard cumulative probability \(\Pr(x \in T \mid \theta)\).
While the CRFs from the GRM, PCM, and SRM are visually similar, many implied probabilities differ dramatically across models — they can be non-unit-spanning, asymmetric, or non-monotonic. The InterModel Vigorish (IMV) quantifies these predictive differences: an IMV of 0.01 corresponds to the expected profit (in dollars) from a $1 bet, comparable to the 2PL vs. 1PL predictive gain in IRT.
We illustrate three things here. In Tab 1, we show the category response functions for all three models (GRM, PCM, SRM) using the selected item parameters. In Tab 2, we show the implied probability curves for every possible dichotomization, comparing how the three models differ. In Tab 3, we compute IMV(DGM, DAM) — how much better the DGM’s predictions are relative to the DAM’s, across all dichotomizations — using a fresh set of out-of-sample responses.
Notes:
- The three models have canonical dichotomizations: adjacent categories for the PCM (\(T = \{k\}; E = \{k-1\}\)), cumulative for the GRM (\(T = \{k, \ldots, K-1\}; E = \emptyset\)), and sequential for the SRM (\(T = \{k, \ldots, K-1\}; E = \{0, \ldots, k-1\}\)).
- Cross-model implied probabilities can take on highly unusual shapes — non-unit-spanning, asymmetric, or non-monotonic — when a model is evaluated on a non-canonical dichotomization.
- The GRM requires strictly ordered thresholds (\(b_1 < b_2 < \cdots < b_{K-1}\)). If thresholds are disordered, GRM results will be suppressed. The PCM and SRM do not require ordered thresholds.
- In Tab 3, IMV(DGM, DAM) should generally be near or above zero when the DGM is correctly specified: the DGM’s predictions should improve on those of the misspecified DAM.
- Item parameters for both the DGM and DAM are estimated from \(N=2000\) simulated responses with known \(\theta\) values. A fresh set of \(N=2000\) responses is then generated from the DGM using its estimated parameters; IMV(DGM, DAM) is computed by comparing DGM predictions to DAM predictions on those new responses. Estimated parameters are used throughout — never the true ones.
#| '!! shinylive warning !!': |
#| shinylive does not work in self-contained HTML documents.
#| Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 1050
library(shiny)
## ── Model functions ──────────────────────────────────────────────────────────
getprob <- function(model, th, b, a) {
pcm <- function(th, b, a) {
K <- length(b) + 1
psi <- list()
psi[[1]] <- rep(1, length(th))
for (k in 1:(K - 1)) {
kern <- k * th - sum(b[1:k])
psi[[k + 1]] <- exp(a * kern)
}
psi <- do.call("cbind", psi)
psi / rowSums(psi)
}
grm <- function(th, b, a) {
invlogit <- function(z) 1 / (1 + exp(-z))
K <- length(b) + 1
pr <- lapply(1:(K - 1), function(i) invlogit(a * (th - b[i])))
pr <- do.call("cbind", pr)
pr <- cbind(1, pr, 0)
p <- lapply(1:K, function(i) pr[, i] - pr[, i + 1])
do.call("cbind", p)
}
srm <- function(th, b, a) {
invlogit <- function(z) 1 / (1 + exp(-z))
K <- length(b) + 1
pr <- lapply(1:(K - 1), function(i) invlogit(a * (th - b[i])))
pr <- do.call("cbind", pr)
p <- list()
for (i in 1:K) {
if (i == 1) tmp <- 1 - pr[, 1]
else if (i == K) tmp <- apply(pr, 1, prod)
else tmp <- apply(pr[, 1:(i - 1), drop = FALSE], 1, prod) * (1 - pr[, i])
p[[i]] <- tmp
}
do.call("cbind", p)
}
do.call(model, args = list(th = th, b = b, a = a))
}
simresp <- function(model, th, b, a) {
p <- getprob(model, th = th, b = b, a = a)
sapply(seq_along(th), function(i) which(rmultinom(1, 1, p[i, ])[, 1] > 0) - 1)
}
estfun <- function(model, resp, th, K) {
inits <- c(1, seq(-1, 1, length.out = K - 1))
f <- function(pars) {
p <- getprob(model, th = th, b = pars[-1], a = pars[1])
z <- mapply(function(i, j) p[i, j], seq_along(th), resp + 1)
-sum(log(pmax(z, 1e-10)))
}
optim(inits, fn = f)$par
}
## ── IMV ──────────────────────────────────────────────────────────────────────
imv.binary <- function(y, p1, p2, sigma = 1e-4) {
p1 <- pmin(pmax(p1, sigma), 1 - sigma)
p2 <- pmin(pmax(p2, sigma), 1 - sigma)
ll <- function(x, p) exp(mean(log(p) * x + log(1 - p) * (1 - x)))
getcoins <- function(a) {
f <- function(p, a) abs(p * log(p) + (1 - p) * log(1 - p) - log(a))
nlminb(0.5, f, lower = 0.001, upper = 0.999, a = a)$par
}
c1 <- getcoins(ll(y, p1))
c2 <- getcoins(ll(y, p2))
(c2 - c1) / c1
}
## ── Implied probabilities ────────────────────────────────────────────────────
dichotomize_probs <- function(p) {
K <- ncol(p)
dichotomize <- function(tag, exc = NULL, p) {
vals <- 0:(K - 1)
if (length(exc) > 0) vals <- vals[!(vals %in% exc)]
L <- lapply(seq_len(tag), function(i) vals)
z <- expand.grid(L)
if (ncol(z) > 1) {
del <- apply(z, 1, function(x) min(diff(x)))
z <- z[del > 0, , drop = FALSE]
}
if (2 * tag == length(vals)) z <- z[seq_len(nrow(z) / 2), , drop = FALSE]
om <- list()
for (i in seq_len(nrow(z))) {
p.star <- p[, unlist(z[i, ]) + 1, drop = FALSE]
p.den <- if (length(exc) > 0) 1 - rowSums(p[, as.numeric(exc) + 1, drop = FALSE]) else rowSums(p)
exc.text <- if (length(exc) == 0) "" else paste(exc, collapse = "/")
om[[paste0("T=", paste0(z[i, ], collapse = "/"), ";E=", exc.text)]] <- rowSums(p.star) / p.den
}
om
}
out <- list()
for (n.exc in 0:(K - 2)) for (n.tag in seq_len(floor((K - n.exc) / 2))) {
tmp <- list()
if (n.exc > 0) {
L <- lapply(seq_len(n.exc), function(i) 0:(K - 1))
z <- expand.grid(L)
if (ncol(z) > 1) {
del <- apply(z, 1, function(x) min(diff(x)))
z <- z[del > 0, , drop = FALSE]
}
for (i in seq_len(nrow(z))) tmp[[i]] <- dichotomize(tag = n.tag, exc = z[i, ], p = p)
} else {
tmp[[1]] <- dichotomize(tag = n.tag, p = p)
}
out[[paste(n.exc, n.tag)]] <- tmp
}
names(out) <- NULL
do.call("c", do.call("c", out))
}
## ── Testing pipeline ─────────────────────────────────────────────────────────
testing_pipeline <- function(resp, p1, p2) {
K <- ncol(p1)
dichotomize <- function(tag, exc = NULL, resp, p1, p2) {
vals <- 0:(K - 1)
if (length(exc) > 0) {
vals <- vals[!(vals %in% exc)]
keep <- !(resp %in% exc)
resp <- resp[keep]; p1 <- p1[keep, ]; p2 <- p2[keep, ]
}
L <- lapply(seq_len(tag), function(i) vals)
z <- expand.grid(L)
if (ncol(z) > 1) {
del <- apply(z, 1, function(x) min(diff(x)))
z <- z[del > 0, , drop = FALSE]
}
if (2 * tag == length(vals)) z <- z[seq_len(nrow(z) / 2), , drop = FALSE]
om <- list()
for (i in seq_len(nrow(z))) {
r0 <- as.integer(resp %in% unlist(z[i, ]))
cols <- unlist(z[i, ]) + 1
p1.star <- p1[, cols, drop = FALSE]
p2.star <- p2[, cols, drop = FALSE]
p1.den <- if (length(exc) > 0) 1 - rowSums(p1[, as.numeric(exc) + 1, drop = FALSE]) else rowSums(p1)
p2.den <- if (length(exc) > 0) 1 - rowSums(p2[, as.numeric(exc) + 1, drop = FALSE]) else rowSums(p2)
exc.text <- if (length(exc) == 0) "" else paste(exc, collapse = "/")
om[[paste0("T=", paste0(z[i, ], collapse = "/"), ";E=", exc.text)]] <-
imv.binary(r0, rowSums(p1.star) / p1.den, rowSums(p2.star) / p2.den)
}
om
}
out <- list()
for (n.exc in 0:(K - 2)) for (n.tag in seq_len(floor((K - n.exc) / 2))) {
tmp <- list()
if (n.exc > 0) {
L <- lapply(seq_len(n.exc), function(i) 0:(K - 1))
z <- expand.grid(L)
if (ncol(z) > 1) {
del <- apply(z, 1, function(x) min(diff(x)))
z <- z[del > 0, , drop = FALSE]
}
for (i in seq_len(nrow(z))) tmp[[i]] <- dichotomize(resp = resp, tag = n.tag, exc = z[i, ], p1 = p1, p2 = p2)
} else {
tmp[[1]] <- dichotomize(resp = resp, tag = n.tag, p1 = p1, p2 = p2)
}
out[[paste(n.exc, n.tag)]] <- tmp
}
names(out) <- NULL
unlist(do.call("c", out))
}
## ── UI ───────────────────────────────────────────────────────────────────────
ui <- fluidPage(
titlePanel("Implied probabilities and the IMV"),
sidebarLayout(
sidebarPanel(
h4("Item parameters"),
sliderInput("K", "Categories (K):", min = 3, max = 5, value = 3, step = 1),
sliderInput("a", "Discrimination (a):", min = 0.5, max = 2.5, value = 1.3, step = 0.1),
hr(),
h4("Threshold parameters"),
p("Each slider sets one threshold b\u2096. The GRM requires strictly increasing
thresholds; results for the GRM will be suppressed if thresholds are disordered.",
style = "font-size: 12px; color: #555;"),
uiOutput("threshold_sliders"),
uiOutput("threshold_warning"),
hr(),
h4("Model comparison"),
selectInput("dgm", "Data-generating model (DGM):",
choices = c("GRM" = "grm", "PCM" = "pcm", "SRM" = "srm"),
selected = "pcm"),
selectInput("dam", "Data-analysis model (DAM):",
choices = c("GRM" = "grm", "PCM" = "pcm", "SRM" = "srm"),
selected = "grm"),
hr(),
h4("Dichotomization (Tab 2)"),
uiOutput("dich_selector"),
hr(),
uiOutput("run_btn"),
hr(),
p("N = 2000 persons, \u03b8 ~ N(0,1). Both DGM and DAM parameters estimated from
first set of responses; IMV computed on a fresh set of responses from the
estimated DGM.",
style = "font-size: 12px; color: #888;")
),
mainPanel(
tabsetPanel(
## ── Tab 1: CRFs ────────────────────────────────────────────────────
tabPanel("Category response functions",
br(),
p("The plots below show the analytical category response functions (CRFs) —
Pr(x = k | \u03b8) for each category k — for all three models (GRM, PCM, SRM),
using the selected item parameters. These are not estimated from data; they
describe what each model predicts for a single item with known parameters.
Despite differing in construction, the three models produce strikingly similar
CRFs for typical parameter values, motivating examination of the full set of
implied probabilities (Tab 2) as a more sensitive discriminator.
If thresholds are disordered the GRM panel is suppressed."),
uiOutput("crf_warning"),
plotOutput("crf_plot", height = "360px")
),
## ── Tab 2: Implied probabilities ───────────────────────────────────
tabPanel("Implied probabilities",
br(),
p("Each curve shows an implied probability — Pr(x \u2208 T | x \u2208 T \u222a E, \u03b8) —
as a function of \u03b8, computed analytically from the CRFs of all three models
using the selected item parameters. Use the selector in the sidebar to explore
different dichotomizations. Unlike CRFs, implied probabilities can differ
sharply across models for the same dichotomization, and may be non-unit-spanning,
asymmetric, or non-monotonic when a model is evaluated on a non-canonical
dichotomization. The GRM curve is suppressed when thresholds are disordered."),
plotOutput("imp_plot", height = "380px")
),
## ── Tab 3: IMV ─────────────────────────────────────────────────────
tabPanel("IMV comparison",
br(),
p("Click 'Run simulation' to: (1) draw \u03b8 ~ N(0,1) for N = 2000 persons and
simulate item responses from the DGM; (2) estimate item parameters for both
the DGM and DAM from those responses using the known \u03b8 values; (3) generate
a fresh set of N = 2000 responses from the DGM using its estimated parameters;
(4) compute IMV(DGM, DAM) for every dichotomization by comparing DGM predictions
to DAM predictions on the new responses. Estimated parameters are used throughout.
IMV(DGM, DAM) \u2265 0 is expected when the DGM is correctly specified."),
uiOutput("imv_warning"),
plotOutput("imv_plot", height = "600px"),
hr(),
h4("Summary"),
verbatimTextOutput("imv_summary")
)
)
)
)
)
## ── Server ───────────────────────────────────────────────────────────────────
server <- function(input, output, session) {
th_grid <- seq(-3, 3, length.out = 500)
## ── Dynamic threshold sliders ─────────────────────────────────────────────
output$threshold_sliders <- renderUI({
K <- input$K
## default values: evenly spaced in (-1.5, 1.5)
def <- seq(-1.5, 1.5, length.out = K - 1)
lapply(seq_len(K - 1), function(k) {
## try to preserve current value if slider already exists
cur <- isolate(input[[paste0("b", k)]])
val <- if (!is.null(cur)) cur else def[k]
sliderInput(
inputId = paste0("b", k),
label = paste0("b\u2096 (k=", k, "):"),
min = -3, max = 3, value = val, step = 0.1
)
})
})
## Reactive: collect threshold vector from individual sliders
get_b <- reactive({
K <- input$K
sapply(seq_len(K - 1), function(k) {
v <- input[[paste0("b", k)]]
if (is.null(v)) 0 else v
})
})
## Reactive: is GRM valid (strictly ordered thresholds)?
grm_valid <- reactive({
b <- get_b()
length(b) < 2 || all(diff(b) > 0)
})
## Threshold warning in sidebar
output$threshold_warning <- renderUI({
if (!grm_valid())
div(style = "background:#f8d7da; border:1px solid #f5c6cb; border-radius:4px;
padding:8px 12px; margin-top:6px; font-size:12px;",
"\u26a0 Thresholds are not strictly increasing. GRM results suppressed.")
})
## CRF-tab warning
output$crf_warning <- renderUI({
if (!grm_valid())
div(style = "background:#f8d7da; border:1px solid #f5c6cb; border-radius:4px;
padding:8px 12px; margin-bottom:10px; font-size:13px;",
strong("\u26a0 GRM suppressed: "),
"thresholds must be strictly increasing for the GRM to be well-defined.")
})
## Analytical CRFs on theta grid
all_probs <- reactive({
b <- get_b()
a <- input$a
out <- list(
pcm = getprob("pcm", th_grid, b, a),
srm = getprob("srm", th_grid, b, a)
)
if (grm_valid())
out$grm <- getprob("grm", th_grid, b, a)
out
})
all_imp <- reactive({
probs <- all_probs()
lapply(probs, dichotomize_probs)
})
## Dichotomization names: use PCM (always available) as reference
dich_names <- reactive({
imp <- all_imp()
names(imp[["pcm"]])
})
output$dich_selector <- renderUI({
nms <- dich_names()
selectInput("sel_dich", NULL,
choices = setNames(seq_along(nms), nms),
selected = 1)
})
## Run button: disabled when DGM == DAM OR when DGM is GRM with disordered thresholds
output$run_btn <- renderUI({
disabled <- (input$dgm == input$dam) ||
(input$dgm == "grm" && !grm_valid())
reason <- if (input$dgm == input$dam) {
"DGM and DAM are the same model."
} else if (input$dgm == "grm" && !grm_valid()) {
"GRM requires ordered thresholds."
} else ""
if (disabled) {
tagList(
tags$button("Run simulation",
class = "btn btn-primary",
disabled = "disabled",
style = "width:100%; opacity:0.5; cursor:not-allowed;"),
if (nchar(reason) > 0)
p(reason, style = "font-size:12px; color:#a00; margin-top:4px;")
)
} else {
actionButton("run", "Run simulation", class = "btn-primary",
style = "width:100%;")
}
})
## ── Tab 1: CRFs ──────────────────────────────────────────────────────────
output$crf_plot <- renderPlot({
K <- input$K
probs <- all_probs()
valid <- grm_valid()
mods <- if (valid) c("grm","pcm","srm") else c("pcm","srm")
n_mods <- length(mods)
cat_cols <- c("#185FA5", "#D85A30", "#1D9E75", "#BA7517", "#A32D2D")
mod_labels <- c(grm = "GRM", pcm = "PCM", srm = "SRM")
b_str <- paste0("b = (", paste(round(get_b(), 1), collapse = ", "), ")")
par(mfrow = c(1, n_mods), mgp = c(2, 1, 0), mar = c(4, 3.5, 3, 1),
oma = c(0, 0, 1.5, 0), family = "serif")
for (mod in mods) {
p <- probs[[mod]]
plot(NULL, xlim = range(th_grid), ylim = 0:1,
xlab = expression(theta),
ylab = if (mod == mods[1]) expression(Pr(x == k ~ "|" ~ theta)) else "",
main = mod_labels[mod])
abline(h = 0.5, col = "gray80", lty = 2)
for (k in seq_len(K))
lines(th_grid, p[, k], col = cat_cols[k], lwd = 2)
if (mod == mods[1])
legend("right", bty = "n", lwd = 2, col = cat_cols[seq_len(K)],
legend = paste0("k=", 0:(K - 1)), cex = 0.85)
}
mtext(paste0("CRFs — a = ", input$a, ", ", b_str),
outer = TRUE, line = 0.3, cex = 0.95, family = "serif")
}, res = 96)
## ── Tab 2: Implied probability ───────────────────────────────────────────
output$imp_plot <- renderPlot({
req(input$sel_dich)
imp <- all_imp()
valid <- grm_valid()
idx <- as.integer(input$sel_dich)
nms <- dich_names()
if (idx > length(nms)) return(NULL)
nm <- nms[idx]
cols <- c(grm = "#185FA5", pcm = "#1D9E75", srm = "#D85A30")
lwds <- c(grm = 1, pcm = 2, srm = 3.5)
ltys <- c(grm = 1, pcm = 2, srm = 3)
par(mgp = c(2, 1, 0), mar = c(4, 4, 3, 1), family = "serif")
plot(NULL, xlim = range(th_grid), ylim = 0:1,
xlab = expression(theta),
ylab = "Pr(dichotomized = 1 | \u03b8)",
main = paste0("Implied probability: ", nm))
abline(h = 0.5, col = "gray80", lty = 2)
draw_mods <- if (valid) c("srm","pcm","grm") else c("srm","pcm")
for (mod in draw_mods) {
ip <- imp[[mod]]
if (!is.null(ip) && nm %in% names(ip))
lines(th_grid, ip[[nm]], col = cols[mod], lwd = lwds[mod], lty = ltys[mod])
}
leg_mods <- if (valid) c("grm","pcm","srm") else c("pcm","srm")
legend("right", bty = "n",
lwd = lwds[leg_mods],
lty = ltys[leg_mods],
col = cols[leg_mods],
legend = toupper(leg_mods), cex = 0.9)
}, res = 96)
## ── Tab 3: IMV simulation ─────────────────────────────────────────────────
output$imv_warning <- renderUI({
if (input$dgm == input$dam)
div(style = "background:#fff3cd; border:1px solid #ffc107; border-radius:4px;
padding:10px 14px; margin-bottom:12px; font-size:13px;",
strong("Note: "), "DGM and DAM are the same model. IMV(DGM, DAM) will be
exactly zero for every dichotomization — both models produce identical
predictions. Select different models to see meaningful IMV values.")
})
imv_results <- eventReactive(input$run, {
K <- input$K
a <- input$a
b <- get_b()
N <- 2000
withProgress(message = "Running simulation...", value = 0, {
incProgress(0.15, detail = "Simulating responses from DGM")
th <- rnorm(N)
resp <- simresp(input$dgm, th = th, b = b, a = a)
incProgress(0.25, detail = paste("Estimating", toupper(input$dgm), "parameters"))
est_dgm <- tryCatch(
estfun(input$dgm, resp, th, K),
error = function(e) c(a, b)
)
incProgress(0.25, detail = paste("Estimating", toupper(input$dam), "parameters"))
est_dam <- tryCatch(
estfun(input$dam, resp, th, K),
error = function(e) c(a, b)
)
incProgress(0.15, detail = "Generating fresh responses from DGM")
resp2 <- simresp(input$dgm, th = th, b = est_dgm[-1], a = est_dgm[1])
incProgress(0.1, detail = "Computing IMV")
p_dgm <- getprob(input$dgm, th, b = est_dgm[-1], a = est_dgm[1])
p_dam <- getprob(input$dam, th, b = est_dam[-1], a = est_dam[1])
## IMV(DGM, DAM): p1 = DAM (baseline), p2 = DGM (enhanced)
testing_pipeline(resp2, p_dam, p_dgm)
})
})
output$imv_plot <- renderPlot({
req(imv_results())
vals <- imv_results()
nms <- names(vals)
dgm_l <- toupper(input$dgm)
dam_l <- toupper(input$dam)
col_map <- c(grm = "#185FA5", pcm = "#1D9E75", srm = "#D85A30")
bar_col <- col_map[input$dgm]
b_str <- paste0("b = (", paste(round(get_b(), 1), collapse = ", "), ")")
par(mgp = c(2, 1, 0), mar = c(4, 10, 3, 2), family = "serif")
barplot(rev(as.numeric(vals)),
names.arg = rev(nms),
horiz = TRUE,
las = 1,
col = adjustcolor(bar_col, alpha.f = 0.7),
border = bar_col,
xlab = paste0("IMV(", dgm_l, ", ", dam_l, ")"),
main = paste0("IMV(", dgm_l, ", ", dam_l, ")",
" [K=", input$K, ", a=", input$a, ", ", b_str, "]"),
cex.names = 0.75,
cex.axis = 0.85)
abline(v = 0, col = "gray30", lty = 2, lwd = 1.5)
abline(v = 0.01, col = "gray60", lty = 3, lwd = 1.5)
legend("bottomright", bty = "n",
lty = c(2, 3), lwd = 1.5,
col = c("gray30", "gray60"),
legend = c("IMV = 0", "IMV = 0.01 benchmark"),
cex = 0.8)
}, res = 96)
output$imv_summary <- renderPrint({
req(imv_results())
vals <- as.numeric(imv_results())
dgm_l <- toupper(input$dgm)
dam_l <- toupper(input$dam)
b_str <- paste(round(get_b(), 2), collapse = ", ")
cat("IMV(", dgm_l, ", ", dam_l, ") across ", length(vals), " dichotomizations\n", sep = "")
cat("========================================\n\n")
cat("Parameters: K =", input$K, "| a =", input$a, "| b = (", b_str, ")\n\n")
cat(sprintf(" Max: %7.4f\n", max(vals)))
cat(sprintf(" Mean: %7.4f\n", mean(vals)))
cat(sprintf(" Median: %7.4f\n", median(vals)))
cat(sprintf(" Min: %7.4f\n", min(vals)))
cat(sprintf(" > 0: %d / %d dichotomizations\n", sum(vals > 0), length(vals)))
cat(sprintf(" > 0.01: %d / %d dichotomizations\n", sum(vals > 0.01), length(vals)))
})
}
shinyApp(ui = ui, server = server)