Make data
Usage
make_data(
model,
n = NULL,
parameters = NULL,
param_type = NULL,
nodes = NULL,
n_steps = NULL,
probs = NULL,
subsets = TRUE,
complete_data = NULL,
given = NULL,
verbose = TRUE,
...
)
Arguments
- model
A
causal_model
. A model object generated bymake_model
.- n
Non negative integer. Number of observations. If not provided it is inferred from the largest n_step.
- parameters
A vector of real numbers in [0,1]. Values of parameters to specify (optional). By default, parameters is drawn from
model$parameters_df
.- param_type
A character. String specifying type of parameters to make ("flat", "prior_mean", "posterior_mean", "prior_draw", "posterior_draw", "define"). With param_type set to
define
use arguments to be passed tomake_priors
; otherwiseflat
sets equal probabilities on each nodal type in each parameter set;prior_mean
,prior_draw
,posterior_mean
,posterior_draw
take parameters as the means or as draws from the prior or posterior.- nodes
A
list
. Which nodes to be observed at each step. If NULL all nodes are observed.- n_steps
A
list
. Number of observations to be observed at each step- probs
A
list
. Observation probabilities at each step- subsets
A
list
. Strata within which observations are to be observed at each step. TRUE for all, otherwise an expression that evaluates to a logical condition.- complete_data
A
data.frame
. Dataset with complete observations. Optional.- given
A string specifying known values on nodes, e.g. "X==1 & Y==1"
- verbose
Logical. If TRUE prints step schedule.
- ...
additional arguments that can be passed to
link{make_parameters}
Details
Note that default behavior is not to take account of whether a node has already been observed when determining whether to select or not. One can however specifically request observation of nodes that have not been previously observed.
Examples
# Simple draws
model <- make_model("X -> M -> Y")
make_data(model)
#> X M Y
#> 1 1 0 1
make_data(model, n = 3, nodes = c("X","Y"))
#> # A tibble: 1 × 5
#> node_names nodes n_steps probs subsets
#> <chr> <list> <dbl> <dbl> <lgl>
#> 1 X, Y <chr [2]> 3 1 TRUE
#> X M Y
#> 1 0 NA 0
#> 2 0 NA 1
#> 3 1 NA 0
make_data(model, n = 3, param_type = "prior_draw")
#> X M Y
#> 1 0 1 0
#> 2 0 1 0
#> 3 0 1 1
make_data(model, n = 10, param_type = "define", parameters = 0:9)
#> X M Y
#> 1 1 0 0
#> 2 1 0 0
#> 3 1 0 0
#> 4 1 1 0
#> 5 1 1 0
#> 6 1 1 0
#> 7 1 1 0
#> 8 1 1 1
#> 9 1 1 1
#> 10 1 1 1
# Data Strategies
# A strategy in which X, Y are observed for sure and M is observed
# with 50% probability for X=1, Y=0 cases
model <- make_model("X -> M -> Y")
make_data(
model,
n = 8,
nodes = list(c("X", "Y"), "M"),
probs = list(1, .5),
subsets = list(TRUE, "X==1 & Y==0"))
#> # A tibble: 2 × 5
#> node_names nodes n_steps probs subsets
#> <chr> <list> <lgl> <dbl> <chr>
#> 1 X, Y <chr [2]> NA 1 TRUE
#> 2 M <chr [1]> NA 0.5 X==1 & Y==0
#> X M Y
#> 1 0 NA 0
#> 2 0 NA 1
#> 3 0 NA 0
#> 4 0 NA 1
#> 5 1 NA 0
#> 6 1 0 0
#> 7 1 NA 1
#> 8 1 NA 1
# n not provided but inferred from largest n_step (not from sum of n_steps)
make_data(
model,
nodes = list(c("X", "Y"), "M"),
n_steps = list(5, 2))
#> # A tibble: 2 × 5
#> node_names nodes n_steps probs subsets
#> <chr> <list> <dbl> <dbl> <lgl>
#> 1 X, Y <chr [2]> 5 1 TRUE
#> 2 M <chr [1]> 2 1 TRUE
#> X M Y
#> 1 0 NA 0
#> 2 0 NA 0
#> 3 0 0 1
#> 4 0 1 1
#> 5 1 NA 1
# Wide then deep
make_data(
model,
n = 8,
nodes = list(c("X", "Y"), "M"),
subsets = list(TRUE, "!is.na(X) & !is.na(Y)"),
n_steps = list(6, 2))
#> # A tibble: 2 × 5
#> node_names nodes n_steps probs subsets
#> <chr> <list> <dbl> <dbl> <chr>
#> 1 X, Y <chr [2]> 6 1 TRUE
#> 2 M <chr [1]> 2 1 !is.na(X) & !is.na(Y)
#> X M Y
#> 1 0 0 1
#> 2 0 1 0
#> 3 0 NA 0
#> 4 NA NA NA
#> 5 1 NA 0
#> 6 NA NA NA
#> 7 1 NA 0
#> 8 1 NA 1
make_data(
model,
n = 8,
nodes = list(c("X", "Y"), c("X", "M")),
subsets = list(TRUE, "is.na(X)"),
n_steps = list(3, 2))
#> # A tibble: 2 × 5
#> node_names nodes n_steps probs subsets
#> <chr> <list> <dbl> <dbl> <chr>
#> 1 X, Y <chr [2]> 3 1 TRUE
#> 2 X, M <chr [2]> 2 1 is.na(X)
#> X M Y
#> 1 NA NA NA
#> 2 0 NA 0
#> 3 NA NA NA
#> 4 0 1 NA
#> 5 1 NA 0
#> 6 NA NA NA
#> 7 1 NA 0
#> 8 1 1 NA
# Example with probabilities at each step
make_data(
model,
n = 8,
nodes = list(c("X", "Y"), c("X", "M")),
subsets = list(TRUE, "is.na(X)"),
probs = list(.5, .2))
#> # A tibble: 2 × 5
#> node_names nodes n_steps probs subsets
#> <chr> <list> <lgl> <dbl> <chr>
#> 1 X, Y <chr [2]> NA 0.5 TRUE
#> 2 X, M <chr [2]> NA 0.2 is.na(X)
#> X M Y
#> 1 NA NA NA
#> 2 NA NA NA
#> 3 0 NA 0
#> 4 0 NA 1
#> 5 0 NA 1
#> 6 NA NA NA
#> 7 NA NA NA
#> 8 1 NA 1
# Example with given data
make_data(model, given = "X==1 & Y==1", n = 5)
#> X M Y
#> 1 1 0 1
#> 2 1 1 1
#> 3 1 1 1
#> 4 1 1 1
#> 5 1 1 1