% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mdyplFit.R, R/zzz_conventions.R
\name{mdyplFit}
\alias{mdyplFit}
\alias{mdypl_fit}
\title{Fitting function for \code{\link[=glm]{glm()}} for maximum Diaconis-Ylvisaker prior
penalized likelihood estimation of logistic regression models}
\usage{
mdyplFit(
  x,
  y,
  weights = rep(1, nobs),
  start = NULL,
  etastart = NULL,
  mustart = NULL,
  offset = rep(0, nobs),
  family = binomial(),
  control = list(),
  intercept = TRUE,
  singular.ok = TRUE
)

mdypl_fit(
  x,
  y,
  weights = rep(1, nobs),
  start = NULL,
  etastart = NULL,
  mustart = NULL,
  offset = rep(0, nobs),
  family = binomial(),
  control = list(),
  intercept = TRUE,
  singular.ok = TRUE
)
}
\arguments{
\item{x}{a design matrix of dimension \code{n * p}.}

\item{y}{a vector of observations of length \code{n}.}

\item{weights}{an optional vector of \sQuote{prior weights} to be used
    in the fitting process.  Should be \code{NULL} or a numeric vector.}

\item{start}{starting values for the parameters in the linear predictor.}

\item{etastart}{starting values for the linear predictor.}

\item{mustart}{starting values for the vector of means.}

\item{offset}{this can be used to specify an \emph{a priori} known
    component to be included in the linear predictor during fitting.
    This should be \code{NULL} or a numeric vector of length equal to
    the number of cases.  One or more \code{\link[stats]{offset}} terms can be
    included in the formula instead or as well, and if more than one is
    specified their sum is used.  See \code{\link[stats]{model.offset}}.}

\item{family}{a description of the error distribution and link
    function to be used in the model.  For \code{glm} this can be a
    character string naming a family function, a family function or the
    result of a call to a family function.  For \code{glm.fit} only the
    third option is supported.  (See \code{\link[stats]{family}} for details of
    family functions.)}

\item{control}{a list of parameters controlling the fitting
process. See \code{\link[=mdyplControl]{mdyplControl()}} for details.}

\item{intercept}{logical. Should an intercept be included in the
    \emph{null} model?}

\item{singular.ok}{logical; if \code{FALSE} a singular fit is an
    error.}
}
\value{
An object inheriting from \code{\link[=mdyplFit]{"mdyplFit"}} object, which
is a list having the same elements to the list that
\code{\link[stats:glm]{stats::glm.fit()}} returns, with a few extra arguments.
}
\description{
\code{\link[=mdyplFit]{mdyplFit()}} is a fitting method for \code{\link[=glm]{glm()}} that fits logistic
regression models using maximum Diaconis-Ylvisaker prior penalized
likelihood estimation.
}
\details{
\code{\link[=mdyplFit]{mdyplFit()}} uses \code{\link[stats:glm]{stats::glm.fit()}} to fit a logistic regression
model on responses \code{alpha * y + (1 - alpha) / 2}, where \code{y} are the
original binomial responses scaled by the binomial totals. This is
equivalent to penalizing the likelihood by the Diaconis-Ylvisaker
prior with shrinkage parameter \eqn{\alpha} and regression parameters
set to zero. See Rigon & Aliverti (2023) and Sterzinger & Kosmidis
(2024).

By default, \code{alpha = n / (p + n)} is used, where \code{n} is the sum of
the binomial totals. Alternative values of \code{alpha} can be passed to
the \code{control} argument; see \code{\link[=mdyplControl]{mdyplControl()}} for setting up the
list passed to \code{control}. If \code{alpha = 1} then \code{\link[=mdyplFit]{mdyplFit()}} will
simply do maximum likelihood estimation.

Note that \code{null.deviance}, \code{deviance} and \code{aic} in the resulting
object are computed at the adjusted responses. Hence, methods such
as \link[stats:logLik]{logLik()} and \link[stats:AIC]{AIC()} use the
penalized log-likelihood. With the default \code{alpha}, the inferential
procedures based on penalized likelihood are asymptotically
equivalent to the ones that use the unpenalized likelihood when
\code{p/n} is vanishing asymptotically.

For high-dimensionality corrected estimates, standard errors and z
statistics, use the \code{\link[=summary.mdyplFit]{summary}} method for
\code{\link[=mdyplFit]{"mdyplFit"}} objects with \code{hd_correction = TRUE}.

\code{\link[=mdypl_fit]{mdypl_fit()}} is an alias to \code{\link[=mdyplFit]{mdyplFit()}}.
}
\examples{

data("lizards", package = "brglm2")
liz_fm <- cbind(grahami, opalinus) ~ height + diameter + light + time
## ML fit = MDYPL fit with `alpha = 1`
liz_ml <- glm(liz_fm, family = binomial(), data = lizards,
              method = "mdyplFit", alpha = 1)
liz_ml0 <- glm(liz_fm, family = binomial(), data = lizards)

## liz_ml is the same fit as liz_ml0
summ_liz_ml <- summary(liz_ml)
summ_liz_ml0 <- summary(liz_ml0)
all.equal(coef(summ_liz_ml), coef(summ_liz_ml0))

## MDYPL fit with default `alpha` (see `?mdyplControl`)
liz_fm <- cbind(grahami, opalinus) ~ height + diameter + light + time
liz_mdypl <- glm(liz_ml, family = binomial(), data = lizards,
                 method = "mdyplFit")

## Comparing outputs from ML and MDYPL, with and without
## high-dimensionality corrections.
summary(liz_mdypl)
summary(liz_mdypl, hd_correction = TRUE)
summ_liz_ml
summary(liz_ml, hd_correction = TRUE)
## Not much difference in fits here as this is a low dimensional
## problem with dimensionality constant
(liz_ml$rank - 1) / sum(weights(liz_ml))



## The case study in Section 8 of Sterzinger and
## Kosmidis (2024)
data("MultipleFeatures", package = "brglm2")

## Center the fou.* and kar.* features
vars <- grep("fou|kar", names(MultipleFeatures), value = TRUE)
train_id <- which(MultipleFeatures$training)
MultipleFeatures[train_id, vars] <- scale(MultipleFeatures[train_id, vars], scale = FALSE)
## Compute the MDYPL fits
kappa <- length(vars) / sum(MultipleFeatures$training)
full_fm <- formula(paste("I(digit == 7) ~", paste(vars, collapse = " + ")))
nest_vars <- grep("fou", vars, value = TRUE)
nest_fm <- formula(paste("I(digit == 7) ~", paste(nest_vars, collapse = " + ")))
full_m <- glm(full_fm, data = MultipleFeatures, family = binomial(),
              method = mdyplFit, alpha = 1 / (1 + kappa), subset = training)
nest_m <- update(full_m, nest_fm)

## With a naive penalized likelihood ratio test we get no evidence
## against the hypothesis that the model with only `fou` features
## is an as good descrition of `7` as the model with both `fou` and
## `kar` features.
plrtest(nest_m, full_m)

## With a high-dimensionality correction theres is strong evidence
## against the model with only `fou` features
plrtest(nest_m, full_m, hd_correction = TRUE)


\donttest{
## A simulated data set as in Rigon & Aliverti (2023, Section 4.3)

set.seed(123)
n <- 1000
p <- 500
gamma <- sqrt(5)
X <- matrix(rnorm(n * p, 0, 1), nrow = n, ncol = p)
betas0 <- rep(c(-1, -1/2, 0, 2, 3), each = p / 5)
betas <- gamma * betas0 / sqrt(sum(betas0^2))
probs <- plogis(drop(X \%*\% betas))
y <- rbinom(n, 1, probs)
fit_mdypl <- glm(y ~ -1 + X, family = binomial(), method = "mdyplFit")

## The default value of `alpha` is `n / (n + p)` here
identical(n / (n + p), fit_mdypl$alpha)

## Aggregate bias of MDYPL and rescaled MDYPL estimators
ag_bias <- function(estimates, beta) mean(estimates - beta)
ag_bias(coef(summary(fit_mdypl))[, "Estimate"], betas)
ag_bias(coef(summary(fit_mdypl, hd_correction = TRUE))[, "Estimate"], betas)

}
}
\references{
Sterzinger P, Kosmidis I (2024). Diaconis-Ylvisaker prior
penalized likelihood for \eqn{p/n \to \kappa \in (0,1)} logistic
regression. \emph{arXiv}:2311.07419v2, \url{https://arxiv.org/abs/2311.07419}.

Rigon T, Aliverti E (2023). Conjugate priors and bias reduction for
logistic regression models. \emph{Statistics & Probability Letters},
\strong{202}, 109901. \doi{10.1016/j.spl.2023.109901}.
}
\seealso{
\code{\link[=mdyplControl]{mdyplControl()}}, \code{\link[=summary.mdyplFit]{summary.mdyplFit()}}, \code{\link[=plrtest.mdyplFit]{plrtest.mdyplFit()}}, \code{\link[=glm]{glm()}}
}
\author{
Ioannis Kosmidis \verb{[aut, cre]} \email{ioannis.kosmidis@warwick.ac.uk}
}
