Skip to contents

Calculated distribution of a query from a prior or posterior distribution of parameters

Usage

query_distribution(
  model,
  queries,
  given = NULL,
  using = "parameters",
  parameters = NULL,
  n_draws = 4000,
  join_by = "|",
  case_level = FALSE,
  query = NULL
)

Arguments

model

A causal_model. A model object generated by make_model.

queries

A character vector or list of character vectors specifying queries on potential outcomes such as "Y[X=1] - Y[X=0]"

given

A character vector specifying givens for each query. A given is a quoted expression that evaluates to logical statement. given allows the query to be conditioned on *observational* distribution. A value of TRUE is interpreted as no conditioning.

using

A character. Whether to use priors, posteriors or parameters

parameters

A vector or list of vectors of real numbers in [0,1]. A true parameter vector to be used instead of parameters attached to the model in case using specifies parameters

n_draws

An integer. Number of draws.rm

join_by

A character. The logical operator joining expanded types when query contains wildcard (.). Can take values "&" (logical AND) or "|" (logical OR). When restriction contains wildcard (.) and join_by is not specified, it defaults to "|", otherwise it defaults to NULL.

case_level

Logical. If TRUE estimates the probability of the query for a case.

query

alias for queries

Value

A data frame where columns contain draws from the distribution of the potential outcomes specified in query

Examples

model <- make_model("X -> Y") |>
         set_parameters(c(.5, .5, .1, .2, .3, .4))
 # \donttest{
 # simple  queries
 query_distribution(model, query = "(Y[X=1] > Y[X=0])",
                    using = "priors") |>
   head()
#>   (Y[X=1] > Y[X=0])
#> 1       0.001795634
#> 2       0.058576170
#> 3       0.278420521
#> 4       0.406568773
#> 5       0.271427323
#> 6       0.067091005

 # multiple  queries
 query_distribution(model,
     query = list("(Y[X=1] > Y[X=0])",
                  "(Y[X=1] < Y[X=0])"),
     using = "priors")|>
   head()
#>   (Y[X=1] > Y[X=0]) (Y[X=1] < Y[X=0])
#> 1         0.3690942        0.36104595
#> 2         0.1351634        0.05122247
#> 3         0.5428152        0.13362011
#> 4         0.7568997        0.23303971
#> 5         0.1808605        0.31686865
#> 6         0.1066036        0.60492467

 # multiple queries and givens
 query_distribution(model,
   query = list("(Y[X=1] > Y[X=0])", "(Y[X=1] < Y[X=0])"),
   given = list("Y==1", "(Y[X=1] <= Y[X=0])"),
   using = "priors")|>
   head()
#>   (Y[X=1] > Y[X=0]) | Y==1 (Y[X=1] < Y[X=0]) | (Y[X=1] <= Y[X=0])
#> 1               0.14383959                             0.07644372
#> 2               0.27842094                             0.24867242
#> 3               0.24598243                             0.48126199
#> 4               0.85117742                             0.87703807
#> 5               0.05181511                             0.26997655
#> 6               0.09507225                             0.33658569

 # linear queries
 query_distribution(model, query = "(Y[X=1] - Y[X=0])")
#>   (Y[X=1] - Y[X=0])
#> 1               0.1

 # queries conditional on observables
 query_distribution(model, query = "(Y[X=1] > Y[X=0])",
                    given = "X==1 & Y ==1")
#>   (Y[X=1] > Y[X=0])
#> 1         0.4285714

 # Linear query conditional on potential outcomes
 query_distribution(model, query = "(Y[X=1] - Y[X=0])",
                    given = "Y[X=1]==0")
#>   (Y[X=1] - Y[X=0])
#> 1        -0.6666667

 # Use join_by to amend query interpretation
 query_distribution(model, query = "(Y[X=.] == 1)", join_by = "&")
#> Generated expanded expression:
#> (Y[X=0] == 1 | Y[X=1] == 1)
#>   (Y[X=.] == 1)
#> 1           0.9

 # Probability of causation query
 query_distribution(model,
    query = "(Y[X=1] > Y[X=0])",
    given = "X==1 & Y==1",
    using = "priors")  |> head()
#>   (Y[X=1] > Y[X=0])
#> 1        0.31688540
#> 2        0.89969187
#> 3        0.21084039
#> 4        0.09281103
#> 5        0.69779748
#> 6        0.86720481

 # Case level probability of causation query
 query_distribution(model,
    query = "(Y[X=1] > Y[X=0])",
    given = "X==1 & Y==1",
    case_level = TRUE,
    using = "priors")
#>   (Y[X=1] > Y[X=0])
#> 1         0.5032177

 # Query posterior
 update_model(model, make_data(model, n = 3)) |>
 query_distribution(query = "(Y[X=1] - Y[X=0])", using = "posteriors") |>
 head()
#> 
#> SAMPLING FOR MODEL 'simplexes' NOW (CHAIN 1).
#> Chain 1: 
#> Chain 1: Gradient evaluation took 2.2e-05 seconds
#> Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.22 seconds.
#> Chain 1: Adjust your expectations accordingly!
#> Chain 1: 
#> Chain 1: 
#> Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
#> Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
#> Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
#> Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
#> Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
#> Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
#> Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
#> Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
#> Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
#> Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
#> Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
#> Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
#> Chain 1: 
#> Chain 1:  Elapsed Time: 0.152 seconds (Warm-up)
#> Chain 1:                0.122 seconds (Sampling)
#> Chain 1:                0.274 seconds (Total)
#> Chain 1: 
#> 
#> SAMPLING FOR MODEL 'simplexes' NOW (CHAIN 2).
#> Chain 2: 
#> Chain 2: Gradient evaluation took 1.8e-05 seconds
#> Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.18 seconds.
#> Chain 2: Adjust your expectations accordingly!
#> Chain 2: 
#> Chain 2: 
#> Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
#> Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
#> Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
#> Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
#> Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
#> Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
#> Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
#> Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
#> Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
#> Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
#> Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
#> Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
#> Chain 2: 
#> Chain 2:  Elapsed Time: 0.13 seconds (Warm-up)
#> Chain 2:                0.132 seconds (Sampling)
#> Chain 2:                0.262 seconds (Total)
#> Chain 2: 
#> 
#> SAMPLING FOR MODEL 'simplexes' NOW (CHAIN 3).
#> Chain 3: 
#> Chain 3: Gradient evaluation took 1.5e-05 seconds
#> Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.15 seconds.
#> Chain 3: Adjust your expectations accordingly!
#> Chain 3: 
#> Chain 3: 
#> Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
#> Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
#> Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
#> Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
#> Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
#> Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
#> Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
#> Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
#> Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
#> Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
#> Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
#> Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
#> Chain 3: 
#> Chain 3:  Elapsed Time: 0.137 seconds (Warm-up)
#> Chain 3:                0.148 seconds (Sampling)
#> Chain 3:                0.285 seconds (Total)
#> Chain 3: 
#> 
#> SAMPLING FOR MODEL 'simplexes' NOW (CHAIN 4).
#> Chain 4: 
#> Chain 4: Gradient evaluation took 1.6e-05 seconds
#> Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.16 seconds.
#> Chain 4: Adjust your expectations accordingly!
#> Chain 4: 
#> Chain 4: 
#> Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
#> Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
#> Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
#> Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
#> Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
#> Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
#> Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
#> Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
#> Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
#> Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
#> Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
#> Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
#> Chain 4: 
#> Chain 4:  Elapsed Time: 0.135 seconds (Warm-up)
#> Chain 4:                0.125 seconds (Sampling)
#> Chain 4:                0.26 seconds (Total)
#> Chain 4: 
#>   (Y[X=1] - Y[X=0])
#> 1       -0.22142760
#> 2       -0.17351364
#> 3        0.31910477
#> 4        0.04481814
#> 5       -0.09084778
#> 6        0.40488316

 # Case level queries provide the inference for a case, which is a scalar
 # The case level query *updates* on the given information
 # For instance, here we have a model for which we are quite sure that X
 # causes Y but we do not know whether it works through two positive effects
 # or two negative effects. Thus we do not know if M=0 would suggest an
 # effect or no effect

 set.seed(1)
 model <-
   make_model("X -> M -> Y") |>
   update_model(data.frame(X = rep(0:1, 8), Y = rep(0:1, 8)), iter = 10000)
#> 
#> SAMPLING FOR MODEL 'simplexes' NOW (CHAIN 1).
#> Chain 1: 
#> Chain 1: Gradient evaluation took 2.7e-05 seconds
#> Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.27 seconds.
#> Chain 1: Adjust your expectations accordingly!
#> Chain 1: 
#> Chain 1: 
#> Chain 1: Iteration:    1 / 10000 [  0%]  (Warmup)
#> Chain 1: Iteration: 1000 / 10000 [ 10%]  (Warmup)
#> Chain 1: Iteration: 2000 / 10000 [ 20%]  (Warmup)
#> Chain 1: Iteration: 3000 / 10000 [ 30%]  (Warmup)
#> Chain 1: Iteration: 4000 / 10000 [ 40%]  (Warmup)
#> Chain 1: Iteration: 5000 / 10000 [ 50%]  (Warmup)
#> Chain 1: Iteration: 5001 / 10000 [ 50%]  (Sampling)
#> Chain 1: Iteration: 6000 / 10000 [ 60%]  (Sampling)
#> Chain 1: Iteration: 7000 / 10000 [ 70%]  (Sampling)
#> Chain 1: Iteration: 8000 / 10000 [ 80%]  (Sampling)
#> Chain 1: Iteration: 9000 / 10000 [ 90%]  (Sampling)
#> Chain 1: Iteration: 10000 / 10000 [100%]  (Sampling)
#> Chain 1: 
#> Chain 1:  Elapsed Time: 1.392 seconds (Warm-up)
#> Chain 1:                1.592 seconds (Sampling)
#> Chain 1:                2.984 seconds (Total)
#> Chain 1: 
#> 
#> SAMPLING FOR MODEL 'simplexes' NOW (CHAIN 2).
#> Chain 2: 
#> Chain 2: Gradient evaluation took 2.5e-05 seconds
#> Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.25 seconds.
#> Chain 2: Adjust your expectations accordingly!
#> Chain 2: 
#> Chain 2: 
#> Chain 2: Iteration:    1 / 10000 [  0%]  (Warmup)
#> Chain 2: Iteration: 1000 / 10000 [ 10%]  (Warmup)
#> Chain 2: Iteration: 2000 / 10000 [ 20%]  (Warmup)
#> Chain 2: Iteration: 3000 / 10000 [ 30%]  (Warmup)
#> Chain 2: Iteration: 4000 / 10000 [ 40%]  (Warmup)
#> Chain 2: Iteration: 5000 / 10000 [ 50%]  (Warmup)
#> Chain 2: Iteration: 5001 / 10000 [ 50%]  (Sampling)
#> Chain 2: Iteration: 6000 / 10000 [ 60%]  (Sampling)
#> Chain 2: Iteration: 7000 / 10000 [ 70%]  (Sampling)
#> Chain 2: Iteration: 8000 / 10000 [ 80%]  (Sampling)
#> Chain 2: Iteration: 9000 / 10000 [ 90%]  (Sampling)
#> Chain 2: Iteration: 10000 / 10000 [100%]  (Sampling)
#> Chain 2: 
#> Chain 2:  Elapsed Time: 1.417 seconds (Warm-up)
#> Chain 2:                1.461 seconds (Sampling)
#> Chain 2:                2.878 seconds (Total)
#> Chain 2: 
#> 
#> SAMPLING FOR MODEL 'simplexes' NOW (CHAIN 3).
#> Chain 3: 
#> Chain 3: Gradient evaluation took 2.5e-05 seconds
#> Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.25 seconds.
#> Chain 3: Adjust your expectations accordingly!
#> Chain 3: 
#> Chain 3: 
#> Chain 3: Iteration:    1 / 10000 [  0%]  (Warmup)
#> Chain 3: Iteration: 1000 / 10000 [ 10%]  (Warmup)
#> Chain 3: Iteration: 2000 / 10000 [ 20%]  (Warmup)
#> Chain 3: Iteration: 3000 / 10000 [ 30%]  (Warmup)
#> Chain 3: Iteration: 4000 / 10000 [ 40%]  (Warmup)
#> Chain 3: Iteration: 5000 / 10000 [ 50%]  (Warmup)
#> Chain 3: Iteration: 5001 / 10000 [ 50%]  (Sampling)
#> Chain 3: Iteration: 6000 / 10000 [ 60%]  (Sampling)
#> Chain 3: Iteration: 7000 / 10000 [ 70%]  (Sampling)
#> Chain 3: Iteration: 8000 / 10000 [ 80%]  (Sampling)
#> Chain 3: Iteration: 9000 / 10000 [ 90%]  (Sampling)
#> Chain 3: Iteration: 10000 / 10000 [100%]  (Sampling)
#> Chain 3: 
#> Chain 3:  Elapsed Time: 1.472 seconds (Warm-up)
#> Chain 3:                1.444 seconds (Sampling)
#> Chain 3:                2.916 seconds (Total)
#> Chain 3: 
#> 
#> SAMPLING FOR MODEL 'simplexes' NOW (CHAIN 4).
#> Chain 4: 
#> Chain 4: Gradient evaluation took 2.7e-05 seconds
#> Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.27 seconds.
#> Chain 4: Adjust your expectations accordingly!
#> Chain 4: 
#> Chain 4: 
#> Chain 4: Iteration:    1 / 10000 [  0%]  (Warmup)
#> Chain 4: Iteration: 1000 / 10000 [ 10%]  (Warmup)
#> Chain 4: Iteration: 2000 / 10000 [ 20%]  (Warmup)
#> Chain 4: Iteration: 3000 / 10000 [ 30%]  (Warmup)
#> Chain 4: Iteration: 4000 / 10000 [ 40%]  (Warmup)
#> Chain 4: Iteration: 5000 / 10000 [ 50%]  (Warmup)
#> Chain 4: Iteration: 5001 / 10000 [ 50%]  (Sampling)
#> Chain 4: Iteration: 6000 / 10000 [ 60%]  (Sampling)
#> Chain 4: Iteration: 7000 / 10000 [ 70%]  (Sampling)
#> Chain 4: Iteration: 8000 / 10000 [ 80%]  (Sampling)
#> Chain 4: Iteration: 9000 / 10000 [ 90%]  (Sampling)
#> Chain 4: Iteration: 10000 / 10000 [100%]  (Sampling)
#> Chain 4: 
#> Chain 4:  Elapsed Time: 1.461 seconds (Warm-up)
#> Chain 4:                2.077 seconds (Sampling)
#> Chain 4:                3.538 seconds (Total)
#> Chain 4: 

 Q <- "Y[X=1] > Y[X=0]"
 G <- "X==1 & Y==1 & M==1"
 QG <- "(Y[X=1] > Y[X=0]) & (X==1 & Y==1 & M==1)"

 # In this case these are very different:
 query_distribution(model, Q, given = G, using = "posteriors")[[1]] |> mean()
#> [1] 0.4238768
 query_distribution(model, Q, given = G, using = "posteriors",
   case_level = TRUE)
#>   Y[X=1] > Y[X=0]
#> 1       0.6715179

 # These are equivalent:
 # 1. Case level query via function
 query_distribution(model, Q, given = G,
    using = "posteriors", case_level = TRUE)
#>   Y[X=1] > Y[X=0]
#> 1       0.6715179

 # 2. Case level query by hand using Bayes' rule
 query_distribution(
     model,
     list(QG = QG, G = G),
     using = "posteriors") |>
    dplyr::summarize(mean(QG)/mean(G))
#>   mean(QG)/mean(G)
#> 1        0.6715179

# }