---
title: "Complete Example 2"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{example2}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
```{r setup, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
options(rmarkdown.html_vignette.check_title = FALSE)
```
## Program
"Complete Example 1" showed how to create a simple demographics table using
the *fmtr* package and *Tidyverse*. Here is the same table created
with only Base R and the *sassy* system of packages.
The data for this example has been included in the **fmtr** package as an
external data file. It may be accessed using the `system.file()` function
as shown below, or downloaded directly from the **fmtr** GitHub site
[here](https://raw.githubusercontent.com/dbosak01/fmtr/master/inst/extdata/DM.csv)
```{r eval=FALSE, echo=TRUE}
library(sassy)
# Prepare Log -------------------------------------------------------------
options("logr.autolog" = TRUE,
"logr.notes" = FALSE)
# Get temp location for log and report output
tmp <- tempdir()
# Open log
lf <- log_open(file.path(tmp, "example2.log"))
# Load and Prepare Data ---------------------------------------------------
sep("Prepare Data")
# Get path to sample data
pkg <- system.file("extdata", package = "fmtr")
# Define data library
libname(sdtm, pkg, "csv")
# Prepare data
put("Subset DM dataset")
dm_mod <- subset(sdtm$DM, ARM != "SCREEN FAILURE",
v(USUBJID, SEX, AGE, ARM)) |> put()
put("Get ARM population counts")
arm_pop <- proc_freq(dm_mod, tables = ARM,
output = long,
options = v(nocum, nopercent, nonobs))
# Create Format Catalog --------------------------------------------------
sep("Create format catalog")
fmts <- fcat(AGECAT = value(condition(x >= 18 & x <= 24, "18 to 24"),
condition(x >= 25 & x <= 44, "25 to 44"),
condition(x >= 45 & x <= 64, "45 to 64"),
condition(x >= 65, ">= 65")),
SEX = value(condition(is.na(x), "Missing", order = 3),
condition(x == "M", "Male", order = 1),
condition(x == "F", "Female", order = 2)),
VAR = c("AGE" = "Age",
"AGECAT" = "Age Group",
"SEX" = "Sex"))
numfmts <- fcat(N = "%d", MEAN = "%.1f", STD = "(%04.2f)", MEDIAN = "%d", Q1 = "%.1f",
Q3 = "%.1f", MIN = "%d", MAX = "%d", CNT = "%d", PCT = "(%4.1f%%)")
numlbls <- c(N = "n", MEANSD = "Mean (SD)", MEDIAN = "Median", Q1Q3 = "Q1 - Q3",
MINMAX = "Min - Max")
# Age Summary Block -------------------------------------------------------
sep("Create summary statistics for age")
age_block <- proc_means(dm_mod, stats = v(n, mean, std, median, q1, q3, min, max),
class = ARM, options = v(nway, notype, nofreq)) |>
datastep(format = numfmts,
keep = v(CLASS, VAR, N, MEANSD, MEDIAN, Q1Q3, MINMAX),
{
MEANSD <- fapply2(MEAN, STD)
Q1Q3 <- fapply2(Q1, Q3, sep = " - ")
MINMAX <- fapply2(MIN, MAX, sep = " - ")
}) |>
proc_transpose(id = CLASS, copy = VAR,
var = v(N, MEANSD, MEDIAN, Q1Q3, MINMAX),
name = "LABEL") |>
datastep({LABEL <- fapply(LABEL, numlbls)})
# Age Group Block ----------------------------------------------------------
sep("Create frequency counts for Age Group")
put("Create age group frequency counts")
ageg_block <- dm_mod |>
datastep({AGECAT <- fapply(AGE, fmts$AGECAT)}) |>
proc_freq(tables = AGECAT, by = ARM,
options = nonobs) |>
datastep(format = numfmts,
keep = v(VAR, BY, LABEL, CNTPCT),
{
LABEL <- CAT
CNTPCT <- fapply2(CNT, PCT)
}) |>
proc_transpose(var = CNTPCT, by = LABEL, copy = VAR, id = BY, options = noname)
put("Sort age groups as desired")
ageg_block$LABEL <- factor(ageg_block$LABEL, levels = levels(fmts$AGECAT))
ageg_block <- proc_sort(ageg_block, by = LABEL, as.character = TRUE)
# Sex Block ---------------------------------------------------------------
sep("Create frequency counts for SEX")
put("Create sex frequency counts")
sex_block <- dm_mod |>
datastep({SEX <- fapply(SEX, fmts$SEX)}) |>
proc_freq(tables = SEX, by = ARM,
options = nonobs) |>
datastep(format = numfmts,
keep = v(VAR, BY, LABEL, CNTPCT),
{
LABEL <- CAT
CNTPCT <- fapply2(CNT, PCT)
}) |>
proc_transpose(var = CNTPCT, by = LABEL, copy = VAR, id = BY, options = noname)
put("Sort age groups as desired")
sex_block$LABEL <- factor(sex_block$LABEL, levels = levels(fmts$SEX))
sex_block <- proc_sort(sex_block, by = LABEL, as.character = TRUE)
put("Combine blocks into final data frame")
final <- bind_rows(age_block, ageg_block, sex_block) |> put()
# Report ------------------------------------------------------------------
sep("Create and print report")
# Create Table
tbl <- create_table(final, first_row_blank = TRUE, borders = c("top", "bottom")) |>
column_defaults(from = `ARM A`, to = `ARM D`, align = "center", width = 1.25) |>
stub(vars = v(VAR, LABEL), "Variable", width = 2.5) |>
define(VAR, blank_after = TRUE, dedupe = TRUE, label = "Variable",
format = fmts$VAR,label_row = TRUE) |>
define(LABEL, indent = .25, label = "Demographic Category") |>
define(`ARM A`, label = "Treatment Group 1", n = arm_pop["ARM A"]) |>
define(`ARM B`, label = "Treatment Group 2", n = arm_pop["ARM B"]) |>
define(`ARM C`, label = "Treatment Group 3", n = arm_pop["ARM C"]) |>
define(`ARM D`, label = "Treatment Group 4", n = arm_pop["ARM D"])
rpt <- create_report(file.path(tmp, "output/example2.rtf"),
output_type = "RTF", font = "Arial") |>
set_margins(top = 1, bottom = 1) |>
page_header("Sponsor: Company", "Study: ABC") |>
titles("Table 1.0", bold = TRUE, blank_row = "none") |>
titles("Analysis of Demographic Characteristics",
"Safety Population") |>
add_content(tbl) |>
footnotes("Program: DM_Table.R",
"NOTE: Denominator based on number of non-missing responses.") |>
page_footer(paste0("Date Produced: ", fapply(Sys.time(), "%d%b%y %H:%M")),
right = "Page [pg] of [tpg]")
res <- write_report(rpt)
# Clean Up ----------------------------------------------------------------
sep("Clean Up")
# Close log
log_close()
# View report
# file.show(res$modified_path)
# View log
# file.show(lf)
```
## Output
Here is the report produced by the above sample program:
## Log
Here is the log produced by the above sample program:
```
=========================================================================
Log Path: C:/Users/dbosa/AppData/Local/Temp/RtmpKc23Rz/log/example2.log
Program Path: C:/Users/dbosa/Documents/.active-rstudio-document
Working Directory: C:/Projects/Archytas/Sassy/Code
User Name: dbosa
R Version: 4.3.2 (2023-10-31 ucrt)
Machine: SOCRATES x86-64
Operating System: Windows 10 x64 build 22621
Base Packages: stats graphics grDevices utils datasets methods base Other
Packages: tidylog_1.0.2 reporter_1.4.4 logr_1.3.5 sassy_1.2.1 lubridate_1.9.2
forcats_1.0.0 stringr_1.5.0 purrr_1.0.1 readr_2.1.4 tidyr_1.3.0 tibble_3.2.1
ggplot2_3.4.4 tidyverse_2.0.0 libr_1.2.8 dplyr_1.1.3 fmtr_1.6.2 procs_1.0.4
common_1.1.1
Log Start Time: 2024-01-07 18:26:25.636328
=========================================================================
=========================================================================
Prepare Data
=========================================================================
# library 'sdtm': 1 items
- attributes: csv not loaded
- path: C:/Users/dbosa/AppData/Local/R/win-library/4.3/fmtr/extdata
- items:
Name Extension Rows Cols Size LastModified
1 DM csv 87 24 45.5 Kb 2023-12-16 23:10:51
Subset DM dataset
# A tibble: 85 × 4
USUBJID SEX AGE ARM
1 ABC-01-049 M 39 ARM D
2 ABC-01-050 M 47 ARM B
3 ABC-01-051 M 34 ARM A
4 ABC-01-052 F 45 ARM C
5 ABC-01-053 F 26 ARM B
6 ABC-01-054 M 44 ARM D
7 ABC-01-055 F 47 ARM C
8 ABC-01-056 M 31 ARM A
9 ABC-01-113 M 74 ARM D
10 ABC-01-114 F 72 ARM B
# ℹ 75 more rows
# ℹ Use `print(n = ...)` to see more rows
Get ARM population counts
proc_freq: input data set 85 rows and 4 columns
tables: ARM
output: long
view: TRUE
output: 1 datasets
# A tibble: 1 × 6
VAR STAT `ARM A` `ARM B` `ARM C` `ARM D`
1 ARM CNT 20 21 21 23
=========================================================================
Create format catalog
=========================================================================
# A user-defined format: 4 conditions
Name Type Expression Label Order
1 obj U x >= 18 & x <= 24 18 to 24 NA
2 obj U x >= 25 & x <= 44 25 to 44 NA
3 obj U x >= 45 & x <= 64 45 to 64 NA
4 obj U x >= 65 >= 65 NA
# A user-defined format: 3 conditions
Name Type Expression Label Order
1 obj U is.na(x) Missing 3
2 obj U x == "M" Male 1
3 obj U x == "F" Female 2
# A format catalog: 3 formats
- $AGECAT: type U, 4 conditions
- $SEX: type U, 3 conditions
- $VAR: type V, 3 elements
# A format catalog: 10 formats
- $N: type S, "%d"
- $MEAN: type S, "%.1f"
- $STD: type S, "(%04.2f)"
- $MEDIAN: type S, "%d"
- $Q1: type S, "%.1f"
- $Q3: type S, "%.1f"
- $MIN: type S, "%d"
- $MAX: type S, "%d"
- $CNT: type S, "%d"
- $PCT: type S, "(%4.1f%%)"
=========================================================================
Create summary statistics for age
=========================================================================
proc_means: input data set 85 rows and 4 columns
class: ARM
var: AGE
stats: n mean std median q1 q3 min max
view: TRUE
output: 1 datasets
CLASS VAR N MEAN STD MEDIAN Q1 Q3 MIN MAX
1 ARM A AGE 20 53.15000 11.89991 52.5 47.5 60 31 73
2 ARM B AGE 21 47.38095 16.25877 46.0 35.0 61 22 73
3 ARM C AGE 21 45.71429 14.41923 46.0 38.0 53 19 71
4 ARM D AGE 23 49.73913 14.32486 48.0 39.0 62 21 75
datastep: columns decreased from 10 to 7
CLASS VAR N MEANSD MEDIAN Q1Q3 MINMAX
1 ARM A AGE 20 53.1 (11.90) 52.5 47.5 - 60.0 31 - 73
2 ARM B AGE 21 47.4 (16.26) 46.0 35.0 - 61.0 22 - 73
3 ARM C AGE 21 45.7 (14.42) 46.0 38.0 - 53.0 19 - 71
4 ARM D AGE 23 49.7 (14.32) 48.0 39.0 - 62.0 21 - 75
proc_transpose: input data set 4 rows and 7 columns
var: N MEANSD MEDIAN Q1Q3 MINMAX
id: CLASS
copy: VAR
name: LABEL
output dataset 5 rows and 6 columns
VAR LABEL ARM A ARM B ARM C ARM D
1 AGE N 20 21 21 23
2 AGE MEANSD 53.1 (11.90) 47.4 (16.26) 45.7 (14.42) 49.7 (14.32)
3 AGE MEDIAN 52.5 46.0 46.0 48.0
4 AGE Q1Q3 47.5 - 60.0 35.0 - 61.0 38.0 - 53.0 39.0 - 62.0
5 AGE MINMAX 31 - 73 22 - 73 19 - 71 21 - 75
datastep: columns started with 6 and ended with 6
VAR LABEL ARM A ARM B ARM C ARM D
1 AGE n 20 21 21 23
2 AGE Mean (SD) 53.1 (11.90) 47.4 (16.26) 45.7 (14.42) 49.7 (14.32)
3 AGE Median 52.5 46.0 46.0 48.0
4 AGE Q1 - Q3 47.5 - 60.0 35.0 - 61.0 38.0 - 53.0 39.0 - 62.0
5 AGE Min - Max 31 - 73 22 - 73 19 - 71 21 - 75
=========================================================================
Create frequency counts for Age Group
=========================================================================
Create age group frequency counts
datastep: columns increased from 4 to 5
# A tibble: 85 × 5
USUBJID SEX AGE ARM AGECAT
1 ABC-01-049 M 39 ARM D 25 to 44
2 ABC-01-050 M 47 ARM B 45 to 64
3 ABC-01-051 M 34 ARM A 25 to 44
4 ABC-01-052 F 45 ARM C 45 to 64
5 ABC-01-053 F 26 ARM B 25 to 44
6 ABC-01-054 M 44 ARM D 25 to 44
7 ABC-01-055 F 47 ARM C 45 to 64
8 ABC-01-056 M 31 ARM A 25 to 44
9 ABC-01-113 M 74 ARM D >= 65
10 ABC-01-114 F 72 ARM B >= 65
# ℹ 75 more rows
# ℹ Use `print(n = ...)` to see more rows
proc_freq: input data set 85 rows and 5 columns
tables: AGECAT
by: ARM
view: TRUE
output: 1 datasets
# A tibble: 16 × 5
BY VAR CAT CNT PCT
1 ARM A AGECAT >= 65 3 15
2 ARM A AGECAT 18 to 24 0 0
3 ARM A AGECAT 25 to 44 4 20
4 ARM A AGECAT 45 to 64 13 65
5 ARM B AGECAT >= 65 5 23.8
6 ARM B AGECAT 18 to 24 1 4.76
7 ARM B AGECAT 25 to 44 8 38.1
8 ARM B AGECAT 45 to 64 7 33.3
9 ARM C AGECAT >= 65 2 9.52
10 ARM C AGECAT 18 to 24 3 14.3
11 ARM C AGECAT 25 to 44 4 19.0
12 ARM C AGECAT 45 to 64 12 57.1
13 ARM D AGECAT >= 65 3 13.0
14 ARM D AGECAT 18 to 24 1 4.35
15 ARM D AGECAT 25 to 44 7 30.4
16 ARM D AGECAT 45 to 64 12 52.2
datastep: columns decreased from 5 to 4
# A tibble: 16 × 4
VAR BY LABEL CNTPCT
1 AGECAT ARM A >= 65 3 (15.0%)
2 AGECAT ARM A 18 to 24 0 ( 0.0%)
3 AGECAT ARM A 25 to 44 4 (20.0%)
4 AGECAT ARM A 45 to 64 13 (65.0%)
5 AGECAT ARM B >= 65 5 (23.8%)
6 AGECAT ARM B 18 to 24 1 ( 4.8%)
7 AGECAT ARM B 25 to 44 8 (38.1%)
8 AGECAT ARM B 45 to 64 7 (33.3%)
9 AGECAT ARM C >= 65 2 ( 9.5%)
10 AGECAT ARM C 18 to 24 3 (14.3%)
11 AGECAT ARM C 25 to 44 4 (19.0%)
12 AGECAT ARM C 45 to 64 12 (57.1%)
13 AGECAT ARM D >= 65 3 (13.0%)
14 AGECAT ARM D 18 to 24 1 ( 4.3%)
15 AGECAT ARM D 25 to 44 7 (30.4%)
16 AGECAT ARM D 45 to 64 12 (52.2%)
proc_transpose: input data set 16 rows and 4 columns
by: LABEL
var: CNTPCT
id: BY
copy: VAR
name: NAME
output dataset 4 rows and 6 columns
# A tibble: 4 × 6
VAR LABEL `ARM A` `ARM B` `ARM C` `ARM D`
1 AGECAT >= 65 3 (15.0%) 5 (23.8%) 2 ( 9.5%) 3 (13.0%)
2 AGECAT 18 to 24 0 ( 0.0%) 1 ( 4.8%) 3 (14.3%) 1 ( 4.3%)
3 AGECAT 25 to 44 4 (20.0%) 8 (38.1%) 4 (19.0%) 7 (30.4%)
4 AGECAT 45 to 64 13 (65.0%) 7 (33.3%) 12 (57.1%) 12 (52.2%)
Sort age groups as desired
proc_sort: input data set 4 rows and 6 columns
by: LABEL
keep: VAR LABEL ARM A ARM B ARM C ARM D
order: a
output data set 4 rows and 6 columns
# A tibble: 4 × 6
VAR LABEL `ARM A` `ARM B` `ARM C` `ARM D`
1 AGECAT 18 to 24 0 ( 0.0%) 1 ( 4.8%) 3 (14.3%) 1 ( 4.3%)
2 AGECAT 25 to 44 4 (20.0%) 8 (38.1%) 4 (19.0%) 7 (30.4%)
3 AGECAT 45 to 64 13 (65.0%) 7 (33.3%) 12 (57.1%) 12 (52.2%)
4 AGECAT >= 65 3 (15.0%) 5 (23.8%) 2 ( 9.5%) 3 (13.0%)
=========================================================================
Create frequency counts for SEX
=========================================================================
Create sex frequency counts
datastep: columns started with 4 and ended with 4
# A tibble: 85 × 4
USUBJID SEX AGE ARM
1 ABC-01-049 Male 39 ARM D
2 ABC-01-050 Male 47 ARM B
3 ABC-01-051 Male 34 ARM A
4 ABC-01-052 Female 45 ARM C
5 ABC-01-053 Female 26 ARM B
6 ABC-01-054 Male 44 ARM D
7 ABC-01-055 Female 47 ARM C
8 ABC-01-056 Male 31 ARM A
9 ABC-01-113 Male 74 ARM D
10 ABC-01-114 Female 72 ARM B
# ℹ 75 more rows
# ℹ Use `print(n = ...)` to see more rows
proc_freq: input data set 85 rows and 4 columns
tables: SEX
by: ARM
view: TRUE
output: 1 datasets
# A tibble: 8 × 5
BY VAR CAT CNT PCT
1 ARM A SEX Female 5 25
2 ARM A SEX Male 15 75
3 ARM B SEX Female 11 52.4
4 ARM B SEX Male 10 47.6
5 ARM C SEX Female 9 42.9
6 ARM C SEX Male 12 57.1
7 ARM D SEX Female 7 30.4
8 ARM D SEX Male 16 69.6
datastep: columns decreased from 5 to 4
# A tibble: 8 × 4
VAR BY LABEL CNTPCT
1 SEX ARM A Female 5 (25.0%)
2 SEX ARM A Male 15 (75.0%)
3 SEX ARM B Female 11 (52.4%)
4 SEX ARM B Male 10 (47.6%)
5 SEX ARM C Female 9 (42.9%)
6 SEX ARM C Male 12 (57.1%)
7 SEX ARM D Female 7 (30.4%)
8 SEX ARM D Male 16 (69.6%)
proc_transpose: input data set 8 rows and 4 columns
by: LABEL
var: CNTPCT
id: BY
copy: VAR
name: NAME
output dataset 2 rows and 6 columns
# A tibble: 2 × 6
VAR LABEL `ARM A` `ARM B` `ARM C` `ARM D`
1 SEX Female 5 (25.0%) 11 (52.4%) 9 (42.9%) 7 (30.4%)
2 SEX Male 15 (75.0%) 10 (47.6%) 12 (57.1%) 16 (69.6%)
Sort age groups as desired
proc_sort: input data set 2 rows and 6 columns
by: LABEL
keep: VAR LABEL ARM A ARM B ARM C ARM D
order: a
output data set 2 rows and 6 columns
# A tibble: 2 × 6
VAR LABEL `ARM A` `ARM B` `ARM C` `ARM D`
1 SEX Male 15 (75.0%) 10 (47.6%) 12 (57.1%) 16 (69.6%)
2 SEX Female 5 (25.0%) 11 (52.4%) 9 (42.9%) 7 (30.4%)
Combine blocks into final data frame
VAR LABEL ARM A ARM B ARM C ARM D
1 AGE n 20 21 21 23
2 AGE Mean (SD) 53.1 (11.90) 47.4 (16.26) 45.7 (14.42) 49.7 (14.32)
3 AGE Median 52.5 46.0 46.0 48.0
4 AGE Q1 - Q3 47.5 - 60.0 35.0 - 61.0 38.0 - 53.0 39.0 - 62.0
5 AGE Min - Max 31 - 73 22 - 73 19 - 71 21 - 75
6 AGECAT 18 to 24 0 ( 0.0%) 1 ( 4.8%) 3 (14.3%) 1 ( 4.3%)
7 AGECAT 25 to 44 4 (20.0%) 8 (38.1%) 4 (19.0%) 7 (30.4%)
8 AGECAT 45 to 64 13 (65.0%) 7 (33.3%) 12 (57.1%) 12 (52.2%)
9 AGECAT >= 65 3 (15.0%) 5 (23.8%) 2 ( 9.5%) 3 (13.0%)
10 SEX Male 15 (75.0%) 10 (47.6%) 12 (57.1%) 16 (69.6%)
11 SEX Female 5 (25.0%) 11 (52.4%) 9 (42.9%) 7 (30.4%)
=========================================================================
Create and print report
=========================================================================
# A report specification: 1 pages
- file_path: 'C:\Users\dbosa\AppData\Local\Temp\RtmpKc23Rz/output/example2.rtf'
- output_type: RTF
- units: inches
- orientation: landscape
- margins: top 1 bottom 1 left 1 right 1
- line size/count: 9/36
- page_header: left=Sponsor: Company right=Study: ABC
- title 1: 'Table 1.0'
- title 2: 'Analysis of Demographic Characteristics'
- title 3: 'Safety Population'
- footnote 1: 'Program: DM_Table.R'
- footnote 2: 'NOTE: Denominator based on number of non-missing responses.'
- page_footer: left=Date Produced: 07Jan24 18:26 center= right=Page [pg] of [tpg]
- content:
# A table specification:
- data: data.frame 'final' 11 rows 6 cols
- show_cols: all
- use_attributes: all
- stub: VAR LABEL 'Variable' width=2.5 align='left'
- define: VAR 'Variable' dedupe='TRUE'
- define: LABEL 'Demographic Category'
- define: ARM A 'Treatment Group 1'
- define: ARM B 'Treatment Group 2'
- define: ARM C 'Treatment Group 3'
- define: ARM D 'Treatment Group 4'
=========================================================================
Clean Up
=========================================================================
=========================================================================
Log End Time: 2024-01-07 18:26:27.426253
Log Elapsed Time: 0 00:00:01
=========================================================================
```