Skip to contents

Make data

Usage

make_data(
  model,
  n = NULL,
  parameters = NULL,
  param_type = NULL,
  nodes = NULL,
  n_steps = NULL,
  probs = NULL,
  subsets = TRUE,
  complete_data = NULL,
  given = NULL,
  verbose = TRUE,
  ...
)

Arguments

model

A causal_model. A model object generated by make_model.

n

Non negative integer. Number of observations. If not provided it is inferred from the largest n_step.

parameters

A vector of real numbers in [0,1]. Values of parameters to specify (optional). By default, parameters is drawn from model$parameters_df.

param_type

A character. String specifying type of parameters to make ("flat", "prior_mean", "posterior_mean", "prior_draw", "posterior_draw", "define"). With param_type set to define use arguments to be passed to make_priors; otherwise flat sets equal probabilities on each nodal type in each parameter set; prior_mean, prior_draw, posterior_mean, posterior_draw take parameters as the means or as draws from the prior or posterior.

nodes

A list. Which nodes to be observed at each step. If NULL all nodes are observed.

n_steps

A list. Number of observations to be observed at each step

probs

A list. Observation probabilities at each step

subsets

A list. Strata within which observations are to be observed at each step. TRUE for all, otherwise an expression that evaluates to a logical condition.

complete_data

A data.frame. Dataset with complete observations. Optional.

given

A string specifying known values on nodes, e.g. "X==1 & Y==1"

verbose

Logical. If TRUE prints step schedule.

...

additional arguments that can be passed to link{make_parameters}

Value

A data.frame with simulated data.

Details

Note that default behavior is not to take account of whether a node has already been observed when determining whether to select or not. One can however specifically request observation of nodes that have not been previously observed.

Examples


# Simple draws
model <- make_model("X -> M -> Y")
make_data(model)
#>   X M Y
#> 1 1 0 1
make_data(model, n = 3, nodes = c("X","Y"))
#> # A tibble: 1 × 5
#>   node_names nodes     n_steps probs subsets
#>   <chr>      <list>      <dbl> <dbl> <lgl>  
#> 1 X, Y       <chr [2]>       3     1 TRUE   
#>   X  M Y
#> 1 0 NA 0
#> 2 0 NA 1
#> 3 1 NA 0
make_data(model, n = 3, param_type = "prior_draw")
#>   X M Y
#> 1 0 1 0
#> 2 0 1 0
#> 3 0 1 1
make_data(model, n = 10, param_type = "define", parameters =  0:9)
#>    X M Y
#> 1  1 0 0
#> 2  1 0 0
#> 3  1 0 0
#> 4  1 1 0
#> 5  1 1 0
#> 6  1 1 0
#> 7  1 1 0
#> 8  1 1 1
#> 9  1 1 1
#> 10 1 1 1

# Data Strategies
# A strategy in which X, Y are observed for sure and M is observed
# with 50% probability for X=1, Y=0 cases

model <- make_model("X -> M -> Y")
make_data(
  model,
  n = 8,
  nodes = list(c("X", "Y"), "M"),
  probs = list(1, .5),
  subsets = list(TRUE, "X==1 & Y==0"))
#> # A tibble: 2 × 5
#>   node_names nodes     n_steps probs subsets    
#>   <chr>      <list>    <lgl>   <dbl> <chr>      
#> 1 X, Y       <chr [2]> NA        1   TRUE       
#> 2 M          <chr [1]> NA        0.5 X==1 & Y==0
#>   X  M Y
#> 1 0 NA 0
#> 2 0 NA 1
#> 3 0 NA 0
#> 4 0 NA 1
#> 5 1 NA 0
#> 6 1  0 0
#> 7 1 NA 1
#> 8 1 NA 1

# n not provided but inferred from largest n_step (not from sum of n_steps)
make_data(
  model,
  nodes = list(c("X", "Y"), "M"),
  n_steps = list(5, 2))
#> # A tibble: 2 × 5
#>   node_names nodes     n_steps probs subsets
#>   <chr>      <list>      <dbl> <dbl> <lgl>  
#> 1 X, Y       <chr [2]>       5     1 TRUE   
#> 2 M          <chr [1]>       2     1 TRUE   
#>   X  M Y
#> 1 0 NA 0
#> 2 0 NA 0
#> 3 0  0 1
#> 4 0  1 1
#> 5 1 NA 1

# Wide then deep
  make_data(
  model,
  n = 8,
  nodes = list(c("X", "Y"), "M"),
  subsets = list(TRUE, "!is.na(X) & !is.na(Y)"),
  n_steps = list(6, 2))
#> # A tibble: 2 × 5
#>   node_names nodes     n_steps probs subsets              
#>   <chr>      <list>      <dbl> <dbl> <chr>                
#> 1 X, Y       <chr [2]>       6     1 TRUE                 
#> 2 M          <chr [1]>       2     1 !is.na(X) & !is.na(Y)
#>    X  M  Y
#> 1  0  0  1
#> 2  0  1  0
#> 3  0 NA  0
#> 4 NA NA NA
#> 5  1 NA  0
#> 6 NA NA NA
#> 7  1 NA  0
#> 8  1 NA  1


make_data(
  model,
  n = 8,
  nodes = list(c("X", "Y"), c("X", "M")),
  subsets = list(TRUE, "is.na(X)"),
  n_steps = list(3, 2))
#> # A tibble: 2 × 5
#>   node_names nodes     n_steps probs subsets 
#>   <chr>      <list>      <dbl> <dbl> <chr>   
#> 1 X, Y       <chr [2]>       3     1 TRUE    
#> 2 X, M       <chr [2]>       2     1 is.na(X)
#>    X  M  Y
#> 1 NA NA NA
#> 2  0 NA  0
#> 3 NA NA NA
#> 4  0  1 NA
#> 5  1 NA  0
#> 6 NA NA NA
#> 7  1 NA  0
#> 8  1  1 NA

# Example with probabilities at each step

make_data(
  model,
  n = 8,
  nodes = list(c("X", "Y"), c("X", "M")),
  subsets = list(TRUE, "is.na(X)"),
  probs = list(.5, .2))
#> # A tibble: 2 × 5
#>   node_names nodes     n_steps probs subsets 
#>   <chr>      <list>    <lgl>   <dbl> <chr>   
#> 1 X, Y       <chr [2]> NA        0.5 TRUE    
#> 2 X, M       <chr [2]> NA        0.2 is.na(X)
#>    X  M  Y
#> 1 NA NA NA
#> 2 NA NA NA
#> 3  0 NA  0
#> 4  0 NA  1
#> 5  0 NA  1
#> 6 NA NA NA
#> 7 NA NA NA
#> 8  1 NA  1

# Example with given data
make_data(model, given = "X==1 & Y==1", n = 5)
#>   X M Y
#> 1 1 0 1
#> 2 1 1 1
#> 3 1 1 1
#> 4 1 1 1
#> 5 1 1 1