Skip to contents

Scramble the values of several variables in a data frame.

Usage

scramble_variables(data, cols, .groups = NULL)

Arguments

data

a data frame

cols

a vector of column names or indices to scramble

.groups

a vector of group names to scramble within groups (default is NULL, meaning no grouping)

Value

A data frame with the specified columns scrambled. If grouping is specified, scrambling is done within each group.

Examples


df <- data.frame(x = 1:6, y = letters[1:6], group = c("A", "A", "A", "B", "B", "B"))

set.seed(123)
# Example without grouping. Variables scrambled across the entire data frame.
df |> scramble_variables(c("x", "y"))
#>   x y group
#> 1 3 e     A
#> 2 6 d     A
#> 3 2 b     A
#> 4 4 f     B
#> 5 5 a     B
#> 6 1 c     B

# Example with grouping. Variable only scrambled within groups.

df |> scramble_variables("y", .groups = "group")
#> # A tibble: 6 × 3
#>       x y     group
#>   <int> <chr> <chr>
#> 1     1 c     A    
#> 2     2 b     A    
#> 3     3 a     A    
#> 4     4 f     B    
#> 5     5 d     B    
#> 6     6 e     B    

# Example with the 'williams' dataset

data(williams)
williams |> scramble_variables(c("ecology", "age"))
#> # A tibble: 112 × 25
#>    subject   SexUnres_1 SexUnres_2 SexUnres_3 SexUnres_4_r SexUnres_5_r Impuls_1
#>    <chr>          <dbl>      <dbl>      <dbl>        <dbl>        <dbl>    <dbl>
#>  1 A30MP4LX…          5          3          2            3            2        3
#>  2 A16X5FB3…          7          7          7            4            2        6
#>  3 A1E9D1OT…          2          4          6            3            3        3
#>  4 A16FPOYD…          5          4          5            3            3        4
#>  5 A11NOTVH…          5          5          6            3            3        5
#>  6 A3TDR6MX…          5          6          6            3            2        5
#>  7 A3OD4F0S…          5          6          5            3            4        5
#>  8 A123PBQD…          4          6          5            2            3        6
#>  9 A25NGIY5…          6          5          6            3            2        6
#> 10 A11WCFPJ…          4          4          4            1            1        1
#> # ℹ 102 more rows
#> # ℹ 18 more variables: Impuls_2_r <dbl>, Impul_3_r <dbl>, Opport_1 <dbl>,
#> #   Opport_2 <dbl>, Opport_3 <dbl>, Opport_4 <dbl>, Opport_5 <dbl>,
#> #   Opport_6_r <dbl>, InvEdu_1_r <dbl>, InvEdu_2_r <dbl>, InvChild_1 <dbl>,
#> #   InvChild_2_r <dbl>, age <dbl>, gender <dbl>, ecology <chr>,
#> #   duration_in_seconds <dbl>, attention_1 <dbl>, attention_2 <dbl>

williams |> scramble_variables(1:5)
#> # A tibble: 112 × 25
#>    subject   SexUnres_1 SexUnres_2 SexUnres_3 SexUnres_4_r SexUnres_5_r Impuls_1
#>    <chr>          <dbl>      <dbl>      <dbl>        <dbl>        <dbl>    <dbl>
#>  1 A2RBF3II…          4          4          5            4            2        3
#>  2 A2UU2XVE…          3          4          5            3            2        6
#>  3 A3I40B0F…          3          7          3            7            3        3
#>  4 A2M0DL7I…          4          4          7            4            3        4
#>  5 A1JG342P…          4          1          3            4            3        5
#>  6 AA9Y4BEM…          5          5          4            3            2        5
#>  7 A3TDR6MX…          2          6          5            3            4        5
#>  8 AEOT74WA…          3          7          7            2            3        6
#>  9 A3VJHYXZ…          6          5          6            1            2        6
#> 10 A297Q7ZL…          5          4          6            2            1        1
#> # ℹ 102 more rows
#> # ℹ 18 more variables: Impuls_2_r <dbl>, Impul_3_r <dbl>, Opport_1 <dbl>,
#> #   Opport_2 <dbl>, Opport_3 <dbl>, Opport_4 <dbl>, Opport_5 <dbl>,
#> #   Opport_6_r <dbl>, InvEdu_1_r <dbl>, InvEdu_2_r <dbl>, InvChild_1 <dbl>,
#> #   InvChild_2_r <dbl>, age <dbl>, gender <dbl>, ecology <chr>,
#> #   duration_in_seconds <dbl>, attention_1 <dbl>, attention_2 <dbl>
williams |> scramble_variables(c("ecology", "age"), .groups = "gender")
#> # A tibble: 112 × 25
#>    subject   SexUnres_1 SexUnres_2 SexUnres_3 SexUnres_4_r SexUnres_5_r Impuls_1
#>    <chr>          <dbl>      <dbl>      <dbl>        <dbl>        <dbl>    <dbl>
#>  1 A30MP4LX…          5          3          2            3            2        3
#>  2 A16X5FB3…          7          7          7            4            2        6
#>  3 A1E9D1OT…          2          4          6            3            3        3
#>  4 A123PBQD…          4          6          5            2            3        6
#>  5 A35G4ABP…          5          5          7            3            2        6
#>  6 A1P6OXEJ…          3          2          3            3            2        2
#>  7 A1QVNF19…          6          3          3            2            6        7
#>  8 A3CW3HM9…          3          2          3            3            3        5
#>  9 A2PPRPW1…          2          1          3            4            3        5
#> 10 AVFSBC7X…          6          5          4            2            1        4
#> # ℹ 102 more rows
#> # ℹ 18 more variables: Impuls_2_r <dbl>, Impul_3_r <dbl>, Opport_1 <dbl>,
#> #   Opport_2 <dbl>, Opport_3 <dbl>, Opport_4 <dbl>, Opport_5 <dbl>,
#> #   Opport_6_r <dbl>, InvEdu_1_r <dbl>, InvEdu_2_r <dbl>, InvChild_1 <dbl>,
#> #   InvChild_2_r <dbl>, age <dbl>, gender <dbl>, ecology <chr>,
#> #   duration_in_seconds <dbl>, attention_1 <dbl>, attention_2 <dbl>

# The function is compatible with column indices

williams |> scramble_variables(c(1, 2), .groups = c(3))
#> # A tibble: 112 × 25
#>    subject   SexUnres_1 SexUnres_2 SexUnres_3 SexUnres_4_r SexUnres_5_r Impuls_1
#>    <chr>          <dbl>      <dbl>      <dbl>        <dbl>        <dbl>    <dbl>
#>  1 A2541C8M…          3          1          3            4            3        5
#>  2 A1WTUFTR…          1          1          4            1            2        3
#>  3 A598UDLZ…          6          1          4            2            1        2
#>  4 A2PPRPW1…          3          1          1            3            1        2
#>  5 A2OVOVZB…          1          1          2            2            2        3
#>  6 ASF5V3K4…          2          1          7            4            3        7
#>  7 A1PUQY8X…          2          1          2            2            1        2
#>  8 A26UIS59…          3          1          2            2            1        1
#>  9 A1DNHN32…          2          2          3            3            2        2
#> 10 AJ72AZV1…          1          2          3            3            3        5
#> # ℹ 102 more rows
#> # ℹ 18 more variables: Impuls_2_r <dbl>, Impul_3_r <dbl>, Opport_1 <dbl>,
#> #   Opport_2 <dbl>, Opport_3 <dbl>, Opport_4 <dbl>, Opport_5 <dbl>,
#> #   Opport_6_r <dbl>, InvEdu_1_r <dbl>, InvEdu_2_r <dbl>, InvChild_1 <dbl>,
#> #   InvChild_2_r <dbl>, age <dbl>, gender <dbl>, ecology <chr>,
#> #   duration_in_seconds <dbl>, attention_1 <dbl>, attention_2 <dbl>