Calculated distribution of a query from a prior or posterior distribution of parameters
Usage
query_distribution(
model,
queries = NULL,
given = NULL,
using = "parameters",
parameters = NULL,
n_draws = 4000,
join_by = "|",
case_level = FALSE,
query = NULL
)
Arguments
- model
A
causal_model
. A model object generated bymake_model
.- queries
A vector of strings or list of strings specifying queries on potential outcomes such as "Y[X=1] - Y[X=0]". Queries can also indicate conditioning sets by placing second queries after a colon: "Y[X=1] - Y[X=0] :|: X == 1 & Y == 1". Note a ':|:' is used rather than the traditional conditioning marker '|' to avoid confusion with logical operators.
- given
A character vector specifying given conditions for each query. A 'given' is a quoted expression that evaluates to logical statement.
given
allows the query to be conditioned on either observed or counterfactural distributions. A value of TRUE is interpreted as no conditioning. A given statement can alternatively be provided after a colon in the query statement.- using
A character. Whether to use priors, posteriors or parameters
- parameters
A vector or list of vectors of real numbers in [0,1]. A true parameter vector to be used instead of parameters attached to the model in case
using
specifiesparameters
- n_draws
An integer. Number of draws.rm
- join_by
A character. The logical operator joining expanded types when
query
contains wildcard (.
). Can take values"&"
(logical AND) or"|"
(logical OR). When restriction contains wildcard (.
) andjoin_by
is not specified, it defaults to"|"
, otherwise it defaults toNULL
.- case_level
Logical. If TRUE estimates the probability of the query for a case.
- query
alias for queries
Value
A data frame where columns contain draws from the distribution
of the potential outcomes specified in query
Examples
model <- make_model("X -> Y") |>
set_parameters(c(.5, .5, .1, .2, .3, .4))
# \donttest{
# simple queries
query_distribution(model, query = "(Y[X=1] > Y[X=0])", using = "priors") |>
head()
#> (Y[X=1] > Y[X=0])
#> 1 0.001795634
#> 2 0.058576170
#> 3 0.278420521
#> 4 0.406568773
#> 5 0.271427323
#> 6 0.067091005
# multiple queries
query_distribution(model,
query = list(PE = "(Y[X=1] > Y[X=0])", NE = "(Y[X=1] < Y[X=0])"),
using = "priors")|>
head()
#> PE NE
#> 1 0.3690942 0.36104595
#> 2 0.1351634 0.05122247
#> 3 0.5428152 0.13362011
#> 4 0.7568997 0.23303971
#> 5 0.1808605 0.31686865
#> 6 0.1066036 0.60492467
# multiple queries and givens, with ':' to identify conditioning distributions
query_distribution(model,
query = list(POC = "(Y[X=1] > Y[X=0]) :|: X == 1 & Y == 1",
Q = "(Y[X=1] < Y[X=0]) :|: (Y[X=1] <= Y[X=0])"),
using = "priors")|>
head()
#> POC Q
#> 1 0.3172258 0.07644372
#> 2 0.2999887 0.24867242
#> 3 0.6330284 0.48126199
#> 4 0.8912561 0.87703807
#> 5 0.1345271 0.26997655
#> 6 0.7452636 0.33658569
# multiple queries and givens, using 'given' argument
query_distribution(model,
query = list("(Y[X=1] > Y[X=0])", "(Y[X=1] < Y[X=0])"),
given = list("Y==1", "(Y[X=1] <= Y[X=0])"),
using = "priors")|>
head()
#> (Y[X=1] > Y[X=0]) :|: Y==1 (Y[X=1] < Y[X=0]) :|: (Y[X=1] <= Y[X=0])
#> 1 0.128606584 0.34041433
#> 2 0.087729648 0.68205949
#> 3 0.003363103 0.61753558
#> 4 0.014524967 0.57066710
#> 5 0.536176041 0.02103535
#> 6 0.291294975 0.16430601
# linear queries
query_distribution(model, query = "(Y[X=1] - Y[X=0])")
#> (Y[X=1] - Y[X=0])
#> 1 0.1
# Linear query conditional on potential outcomes
query_distribution(model, query = "(Y[X=1] - Y[X=0]) :|: Y[X=1]==0")
#> (Y[X=1] - Y[X=0]) :|: Y[X=1]==0
#> 1 -0.6666667
# Use join_by to amend query interpretation
query_distribution(model, query = "(Y[X=.] == 1)", join_by = "&")
#> Generated expanded expression:
#> (Y[X=0] == 1 | Y[X=1] == 1)
#> (Y[X=.] == 1)
#> 1 0.9
# Probability of causation query
query_distribution(model,
query = "(Y[X=1] > Y[X=0])",
given = "X==1 & Y==1",
using = "priors") |> head()
#> (Y[X=1] > Y[X=0]) :|: X==1 & Y==1
#> 1 0.4896671
#> 2 0.9021646
#> 3 0.6101905
#> 4 0.4000978
#> 5 0.1055981
#> 6 0.3423999
# Case level probability of causation query
query_distribution(model,
query = "(Y[X=1] > Y[X=0])",
given = "X==1 & Y==1",
case_level = TRUE,
using = "priors")
#> (Y[X=1] > Y[X=0]) :|: X==1 & Y==1
#> 1 0.4905204
# Query posterior
update_model(model, make_data(model, n = 3)) |>
query_distribution(query = "(Y[X=1] - Y[X=0])", using = "posteriors") |>
head()
#>
#> SAMPLING FOR MODEL 'simplexes' NOW (CHAIN 1).
#> Chain 1:
#> Chain 1: Gradient evaluation took 2.1e-05 seconds
#> Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.21 seconds.
#> Chain 1: Adjust your expectations accordingly!
#> Chain 1:
#> Chain 1:
#> Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
#> Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
#> Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
#> Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
#> Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
#> Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
#> Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
#> Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
#> Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
#> Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
#> Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
#> Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
#> Chain 1:
#> Chain 1: Elapsed Time: 0.131 seconds (Warm-up)
#> Chain 1: 0.135 seconds (Sampling)
#> Chain 1: 0.266 seconds (Total)
#> Chain 1:
#>
#> SAMPLING FOR MODEL 'simplexes' NOW (CHAIN 2).
#> Chain 2:
#> Chain 2: Gradient evaluation took 1.7e-05 seconds
#> Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.17 seconds.
#> Chain 2: Adjust your expectations accordingly!
#> Chain 2:
#> Chain 2:
#> Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
#> Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
#> Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
#> Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
#> Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
#> Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
#> Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
#> Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
#> Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
#> Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
#> Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
#> Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
#> Chain 2:
#> Chain 2: Elapsed Time: 0.135 seconds (Warm-up)
#> Chain 2: 0.145 seconds (Sampling)
#> Chain 2: 0.28 seconds (Total)
#> Chain 2:
#>
#> SAMPLING FOR MODEL 'simplexes' NOW (CHAIN 3).
#> Chain 3:
#> Chain 3: Gradient evaluation took 1.6e-05 seconds
#> Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.16 seconds.
#> Chain 3: Adjust your expectations accordingly!
#> Chain 3:
#> Chain 3:
#> Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
#> Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
#> Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
#> Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
#> Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
#> Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
#> Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
#> Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
#> Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
#> Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
#> Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
#> Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
#> Chain 3:
#> Chain 3: Elapsed Time: 0.139 seconds (Warm-up)
#> Chain 3: 0.162 seconds (Sampling)
#> Chain 3: 0.301 seconds (Total)
#> Chain 3:
#>
#> SAMPLING FOR MODEL 'simplexes' NOW (CHAIN 4).
#> Chain 4:
#> Chain 4: Gradient evaluation took 1.7e-05 seconds
#> Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.17 seconds.
#> Chain 4: Adjust your expectations accordingly!
#> Chain 4:
#> Chain 4:
#> Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
#> Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
#> Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
#> Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
#> Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
#> Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
#> Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
#> Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
#> Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
#> Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
#> Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
#> Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
#> Chain 4:
#> Chain 4: Elapsed Time: 0.14 seconds (Warm-up)
#> Chain 4: 0.14 seconds (Sampling)
#> Chain 4: 0.28 seconds (Total)
#> Chain 4:
#> (Y[X=1] - Y[X=0])
#> 1 -0.2235133318
#> 2 0.0002778984
#> 3 0.2453931230
#> 4 -0.1057622221
#> 5 0.2267328204
#> 6 0.4706564808
# Case level queries provide the inference for a case, which is a scalar
# The case level query *updates* on the given information
# For instance, here we have a model for which we are quite sure that X
# causes Y but we do not know whether it works through two positive effects
# or two negative effects. Thus we do not know if M=0 would suggest an
# effect or no effect
set.seed(1)
model <-
make_model("X -> M -> Y") |>
update_model(data.frame(X = rep(0:1, 8), Y = rep(0:1, 8)), iter = 10000)
#>
#> SAMPLING FOR MODEL 'simplexes' NOW (CHAIN 1).
#> Chain 1:
#> Chain 1: Gradient evaluation took 2.9e-05 seconds
#> Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.29 seconds.
#> Chain 1: Adjust your expectations accordingly!
#> Chain 1:
#> Chain 1:
#> Chain 1: Iteration: 1 / 10000 [ 0%] (Warmup)
#> Chain 1: Iteration: 1000 / 10000 [ 10%] (Warmup)
#> Chain 1: Iteration: 2000 / 10000 [ 20%] (Warmup)
#> Chain 1: Iteration: 3000 / 10000 [ 30%] (Warmup)
#> Chain 1: Iteration: 4000 / 10000 [ 40%] (Warmup)
#> Chain 1: Iteration: 5000 / 10000 [ 50%] (Warmup)
#> Chain 1: Iteration: 5001 / 10000 [ 50%] (Sampling)
#> Chain 1: Iteration: 6000 / 10000 [ 60%] (Sampling)
#> Chain 1: Iteration: 7000 / 10000 [ 70%] (Sampling)
#> Chain 1: Iteration: 8000 / 10000 [ 80%] (Sampling)
#> Chain 1: Iteration: 9000 / 10000 [ 90%] (Sampling)
#> Chain 1: Iteration: 10000 / 10000 [100%] (Sampling)
#> Chain 1:
#> Chain 1: Elapsed Time: 1.429 seconds (Warm-up)
#> Chain 1: 1.632 seconds (Sampling)
#> Chain 1: 3.061 seconds (Total)
#> Chain 1:
#>
#> SAMPLING FOR MODEL 'simplexes' NOW (CHAIN 2).
#> Chain 2:
#> Chain 2: Gradient evaluation took 2.7e-05 seconds
#> Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.27 seconds.
#> Chain 2: Adjust your expectations accordingly!
#> Chain 2:
#> Chain 2:
#> Chain 2: Iteration: 1 / 10000 [ 0%] (Warmup)
#> Chain 2: Iteration: 1000 / 10000 [ 10%] (Warmup)
#> Chain 2: Iteration: 2000 / 10000 [ 20%] (Warmup)
#> Chain 2: Iteration: 3000 / 10000 [ 30%] (Warmup)
#> Chain 2: Iteration: 4000 / 10000 [ 40%] (Warmup)
#> Chain 2: Iteration: 5000 / 10000 [ 50%] (Warmup)
#> Chain 2: Iteration: 5001 / 10000 [ 50%] (Sampling)
#> Chain 2: Iteration: 6000 / 10000 [ 60%] (Sampling)
#> Chain 2: Iteration: 7000 / 10000 [ 70%] (Sampling)
#> Chain 2: Iteration: 8000 / 10000 [ 80%] (Sampling)
#> Chain 2: Iteration: 9000 / 10000 [ 90%] (Sampling)
#> Chain 2: Iteration: 10000 / 10000 [100%] (Sampling)
#> Chain 2:
#> Chain 2: Elapsed Time: 1.448 seconds (Warm-up)
#> Chain 2: 1.491 seconds (Sampling)
#> Chain 2: 2.939 seconds (Total)
#> Chain 2:
#>
#> SAMPLING FOR MODEL 'simplexes' NOW (CHAIN 3).
#> Chain 3:
#> Chain 3: Gradient evaluation took 2.7e-05 seconds
#> Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.27 seconds.
#> Chain 3: Adjust your expectations accordingly!
#> Chain 3:
#> Chain 3:
#> Chain 3: Iteration: 1 / 10000 [ 0%] (Warmup)
#> Chain 3: Iteration: 1000 / 10000 [ 10%] (Warmup)
#> Chain 3: Iteration: 2000 / 10000 [ 20%] (Warmup)
#> Chain 3: Iteration: 3000 / 10000 [ 30%] (Warmup)
#> Chain 3: Iteration: 4000 / 10000 [ 40%] (Warmup)
#> Chain 3: Iteration: 5000 / 10000 [ 50%] (Warmup)
#> Chain 3: Iteration: 5001 / 10000 [ 50%] (Sampling)
#> Chain 3: Iteration: 6000 / 10000 [ 60%] (Sampling)
#> Chain 3: Iteration: 7000 / 10000 [ 70%] (Sampling)
#> Chain 3: Iteration: 8000 / 10000 [ 80%] (Sampling)
#> Chain 3: Iteration: 9000 / 10000 [ 90%] (Sampling)
#> Chain 3: Iteration: 10000 / 10000 [100%] (Sampling)
#> Chain 3:
#> Chain 3: Elapsed Time: 1.511 seconds (Warm-up)
#> Chain 3: 1.473 seconds (Sampling)
#> Chain 3: 2.984 seconds (Total)
#> Chain 3:
#>
#> SAMPLING FOR MODEL 'simplexes' NOW (CHAIN 4).
#> Chain 4:
#> Chain 4: Gradient evaluation took 2.4e-05 seconds
#> Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.24 seconds.
#> Chain 4: Adjust your expectations accordingly!
#> Chain 4:
#> Chain 4:
#> Chain 4: Iteration: 1 / 10000 [ 0%] (Warmup)
#> Chain 4: Iteration: 1000 / 10000 [ 10%] (Warmup)
#> Chain 4: Iteration: 2000 / 10000 [ 20%] (Warmup)
#> Chain 4: Iteration: 3000 / 10000 [ 30%] (Warmup)
#> Chain 4: Iteration: 4000 / 10000 [ 40%] (Warmup)
#> Chain 4: Iteration: 5000 / 10000 [ 50%] (Warmup)
#> Chain 4: Iteration: 5001 / 10000 [ 50%] (Sampling)
#> Chain 4: Iteration: 6000 / 10000 [ 60%] (Sampling)
#> Chain 4: Iteration: 7000 / 10000 [ 70%] (Sampling)
#> Chain 4: Iteration: 8000 / 10000 [ 80%] (Sampling)
#> Chain 4: Iteration: 9000 / 10000 [ 90%] (Sampling)
#> Chain 4: Iteration: 10000 / 10000 [100%] (Sampling)
#> Chain 4:
#> Chain 4: Elapsed Time: 1.501 seconds (Warm-up)
#> Chain 4: 2.218 seconds (Sampling)
#> Chain 4: 3.719 seconds (Total)
#> Chain 4:
Q <- "Y[X=1] > Y[X=0]"
G <- "X==1 & Y==1 & M==1"
QG <- "(Y[X=1] > Y[X=0]) & (X==1 & Y==1 & M==1)"
# In this case these are very different:
query_distribution(model, Q, given = G, using = "posteriors")[[1]] |> mean()
#> [1] 0.4238768
query_distribution(model, Q, given = G, using = "posteriors",
case_level = TRUE)
#> Y[X=1] > Y[X=0] :|: X==1 & Y==1 & M==1
#> 1 0.6715179
# These are equivalent:
# 1. Case level query via function
query_distribution(model, Q, given = G,
using = "posteriors", case_level = TRUE)
#> Y[X=1] > Y[X=0] :|: X==1 & Y==1 & M==1
#> 1 0.6715179
# 2. Case level query by hand using Bayes' rule
query_distribution(
model,
list(QG = QG, G = G),
using = "posteriors") |>
dplyr::summarize(mean(QG)/mean(G))
#> mean(QG)/mean(G)
#> 1 0.6715179
# }