Skip to content

Latest commit

 

History

History
341 lines (275 loc) · 10.2 KB

README.org

File metadata and controls

341 lines (275 loc) · 10.2 KB

Latent Variable Models (lava)

Latent Variable Models: lava <img src=”” alt=”” />

A general implementation of Structural Equation Models with latent variables (MLE, 2SLS, and composite likelihood estimators) with both continuous, censored, and ordinal outcomes (Holst and Budtz-Joergensen (2013) <10.1007/s00180-012-0344-y>). Mixture latent variable models and non-linear latent variable models (Holst and Budtz-Joergensen (2020) <10.1093/biostatistics/kxy082>). The package also provides methods for graph exploration (d-separation, back-door criterion), simulation of general non-linear latent variable models, and estimation of influence functions for a broad range of statistical models.

Installation

install.packages("lava", dependencies=TRUE)
library("lava")
demo("lava")

For graphical capabilities the Rgraphviz package is needed (first install the BiocManager package)

# install.packages("BiocManager")
BiocManager::install("Rgraphviz")

or the igraph or visNetwork packages

install.packages("igraph")
install.packages("visNetwork")

The development version of lava may also be installed directly from github:

# install.packages("remotes")
remotes::install_github("kkholst/lava")

Citation

To cite that lava package please use one of the following references

Klaus K. Holst and Esben Budtz-Joergensen (2013). Linear Latent Variable Models: The lava-package. Computational Statistics 28 (4), pp 1385-1453. http://dx.doi.org/10.1007/s00180-012-0344-y

@article{lava,
  title = {Linear Latent Variable Models: The lava-package},
  author = {Klaus Kähler Holst and Esben Budtz-Jørgensen},
  year = {2013},
  volume = {28},
  number = {4},
  pages = {1385-1452},
  journal = {Computational Statistics},
  doi = {10.1007/s00180-012-0344-y}
}

Klaus K. Holst and Esben Budtz-Jørgensen (2020). A two-stage estimation procedure for non-linear structural equation models. Biostatistics 21 (4), pp 676-691. http://dx.doi.org/10.1093/biostatistics/kxy082

@article{lava_nlin,
  title = {A two-stage estimation procedure for non-linear structural equation models},
  author = {Klaus Kähler Holst and Esben Budtz-Jørgensen},
  journal = {Biostatistics},
  year = {2020},
  volume = {21},
  number = {4},
  pages = {676-691},
  doi = {10.1093/biostatistics/kxy082},
}

Examples

library(lava)

Structural Equation Model

Specify structural equation models with two factors

m <- lvm()
regression(m) <- y1 + y2 + y3 ~ eta1
regression(m) <- z1 + z2 + z3 ~ eta2
latent(m) <- ~ eta1 + eta2
regression(m) <- eta2 ~ eta1 + x
regression(m) <- eta1 ~ x

labels(m) <- c(eta1=expression(eta[1]), eta2=expression(eta[2]))
plot(m)

Simulation

d <- sim(m, 100, seed=1)

Estimation

e <- estimate(m, d)
e
                    Estimate Std. Error  Z-value   P-value
Measurements:
   y2~eta1           0.95462    0.08083 11.80993    <1e-12
   y3~eta1           0.98476    0.08922 11.03722    <1e-12
    z2~eta2          0.97038    0.05368 18.07714    <1e-12
    z3~eta2          0.95608    0.05643 16.94182    <1e-12
Regressions:
   eta1~x            1.24587    0.11486 10.84694    <1e-12
    eta2~eta1        0.95608    0.18008  5.30910 1.102e-07
    eta2~x           1.11495    0.25228  4.41951 9.893e-06
Intercepts:
   y2               -0.13896    0.12458 -1.11537    0.2647
   y3               -0.07661    0.13869 -0.55241    0.5807
   eta1              0.15801    0.12780  1.23644    0.2163
   z2               -0.00441    0.14858 -0.02969    0.9763
   z3               -0.15900    0.15731 -1.01076    0.3121
   eta2             -0.14143    0.18380 -0.76949    0.4416
Residual Variances:
   y1                0.69684    0.14858  4.69004
   y2                0.89804    0.16630  5.40026
   y3                1.22456    0.21182  5.78109
   eta1              0.93620    0.19623  4.77084
   z1                1.41422    0.26259  5.38570
   z2                0.87569    0.19463  4.49934
   z3                1.18155    0.22640  5.21883
   eta2              1.24430    0.28992  4.29195

Model assessment

Assessing goodness-of-fit, here the linearity between eta2 and eta1 (requires the gof package)

# install.packages("gof", repos="https://kkholst.github.io/r_repo/")
library("gof")
set.seed(1)
g <- cumres(e, eta2 ~ eta1)
plot(g)

Non-linear measurement error model

Simulate non-linear model

m <- lvm(y1 + y2 + y3 ~ u, u ~ x)
transform(m,u2 ~ u) <- function(x) x^2
regression(m) <- z~u2+u

d <- sim(m,200,p=c("z"=-1, "z~u2"=-0.5), seed=1)

Stage 1:

m1 <- lvm(c(y1[0:s], y2[0:s], y3[0:s]) ~ 1*u, u ~ x)
latent(m1) <- ~ u
(e1 <- estimate(m1, d))
                    Estimate Std. Error  Z-value  P-value
Regressions:
   u~x               1.06998    0.08208 13.03542   <1e-12
Intercepts:
   u                -0.08871    0.08753 -1.01344   0.3108
Residual Variances:
   y1                1.00054    0.07075 14.14214
   u                 1.19873    0.15503  7.73233

Stage 2

pp <- function(mu,var,data,...) cbind(u=mu[,"u"], u2=mu[,"u"]^2+var["u","u"])
(e <- measurement.error(e1, z~1+x, data=d, predictfun=pp))
            Estimate Std.Err    2.5%   97.5%   P-value
(Intercept)  -1.1181 0.13795 -1.3885 -0.8477 5.273e-16
x            -0.0537 0.13213 -0.3127  0.2053 6.844e-01
u             1.0039 0.11504  0.7785  1.2294 2.609e-18
u2           -0.4718 0.05213 -0.5740 -0.3697 1.410e-19
f <- function(p) p[1]+p["u"]*u+p["u2"]*u^2
u <- seq(-1, 1, length.out=100)
plot(e, f, data=data.frame(u))

Simulation

Studying the small-sample properties of a mediation analysis

m <- lvm(y~x, c~1)
regression(m) <- y+x ~ z
eventTime(m) <- t~min(y=1, c=0)
transform(m,S~t+status) <- function(x) survival::Surv(x[,1],x[,2])
plot(m)

Simulate from model and estimate indirect effects

onerun <- function(...) {
    d <- sim(m, 100)
    m0 <- lvm(S~x+z, x~z)
    e <- estimate(m0, d, estimator="glm")
    vec(summary(effects(e, S~z))$coef[,1:2])
}
val <- sim(onerun, 100)
summary(val, estimate=1:4, se=5:8, short=TRUE)
100 replications					Time: 4.859s

        Total.Estimate Direct.Estimate Indirect.Estimate S~x~z.Estimate
Mean           1.97895         0.98166           0.99729        0.99729
SD             0.20383         0.16523           0.18452        0.18452
SE             0.18149         0.17857           0.16476        0.16476
SE/SD          0.89041         1.08070           0.89290        0.89290

Min            1.56201         0.51205           0.58638        0.58638
2.5%           1.64021         0.66656           0.64491        0.64491
50%            1.94437         0.98567           0.98406        0.98406
97.5%          2.43580         1.30093           1.41001        1.41001
Max            2.51655         1.50968           1.45584        1.45584

Missing        0.00000         0.00000           0.00000        0.00000

Add additional simulations and visualize results

  val <- sim(val,500) ## Add 500 simulations
  plot(val, estimate=c("Total.Estimate", "Indirect.Estimate"),
	 true=c(2, 1), se=c("Total.Std.Err", "Indirect.Std.Err"),
	 scatter.plot=TRUE)