Benchmark

Shixiang Wang

Central South University
wangshx@csu.edu.cn

2025-08-09

library(bregr)
#> Welcome to 'bregr' package!
#> =======================================================================
#> You are using bregr version 1.0.0.9000
#> 
#> Project home : https://github.com/WangLabCSU/bregr
#> Documentation: https://wanglabcsu.github.io/bregr/
#> Cite as      : arXiv:2110.14232
#> =======================================================================
#> 
library(microbenchmark)

Generate Simulated Data

set.seed(2025L)
data = matrix(
  rnorm(1e7), ncol = 1e3, 
  dimnames = list(NULL, paste0("var", 1:1e3))) |>
  as.data.frame()
data$y = rnorm(1e4)
sample_data = function(data, n) {
  data[, c(seq_len(n), ncol(data))]
}

Functions

lm_batch_bregr = function(data, n_workers = 1L) {
  br_pipeline(
    data = data,
    y = "y",
    x = setdiff(colnames(data), "y"), 
    method = "lm", run_parallel = n_workers
  )
}

lm_batch_bregr_save = function(data, n_workers = 1L) {
  options(bregr.save_model = TRUE)
  # save model files to different path,
  # if tempdir() used, all models in different runs will be save to same dir
  # it may cause some issues
  temp_path = tempfile()
  options(bregr.path = temp_path)
  on.exit({
    options(bregr.save_model = FALSE)
    fs::dir_delete(temp_path)
  })
  
  lm_batch_bregr(data, n_workers)
}

lm_batch_for = function(data) {
  mods = list()
  mod_rv_list = list()
  mod_rv_list2 = list()
  xs = setdiff(colnames(data), "y")
  for (x in xs) {
    mod = lm(as.formula(glue::glue("y ~ {x}", x = x)), data = data)
    mods[[x]] = mod
    mod_rv_list[[x]] = broom.helpers::tidy_plus_plus(mod)
    mod_rv_list2[[x]] = broom::tidy(mod)
  }
  names(mod_rv_list) = xs
  results = dplyr::bind_rows(mod_rv_list, .id = "Focal_variable")
  
  names(mod_rv_list2) = xs
  tidy_results = dplyr::bind_rows(mod_rv_list2, .id = "Focal_variable")
  
  list(
    models = mods,
    results = results,
    tidy_results = tidy_results
  )
}

Benchmark

ev = microbenchmark(
  batch_lm_bregr_10 = lm_batch_bregr(sample_data(data, 10)),
  batch_lm_bregr_parallel_10 = lm_batch_bregr(sample_data(data, 10), n_workers = 20),
  batch_lm_bregr_save_10 = lm_batch_bregr_save(sample_data(data, 10)),
  batch_lm_bregr_save_parallel_10 = lm_batch_bregr_save(sample_data(data, 10), n_workers = 20),
  batch_lm_for_10 = lm_batch_for(sample_data(data, 10)),
  
  batch_lm_bregr_50 = lm_batch_bregr(sample_data(data, 50)),
  batch_lm_bregr_parallel_50 = lm_batch_bregr(sample_data(data, 50), n_workers = 20),
  batch_lm_bregr_save_50 = lm_batch_bregr_save(sample_data(data, 50)),
  batch_lm_bregr_save_parallel_50 = lm_batch_bregr_save(sample_data(data, 50), n_workers = 20),
  batch_lm_for_50 = lm_batch_for(sample_data(data, 50)),
  
  batch_lm_bregr_250 = lm_batch_bregr(sample_data(data, 250)),
  batch_lm_bregr_parallel_250 = lm_batch_bregr(sample_data(data, 250), n_workers = 20),
  batch_lm_bregr_save_250 = lm_batch_bregr_save(sample_data(data, 250)),
  batch_lm_bregr_save_parallel_250 = lm_batch_bregr_save(sample_data(data, 250), n_workers = 20),
  batch_lm_for_250 = lm_batch_for(sample_data(data, 250)),
  
  
  batch_lm_bregr_1000 = lm_batch_bregr(sample_data(data, 1000)),
  batch_lm_bregr_parallel_1000 = lm_batch_bregr(sample_data(data, 1000), n_workers = 20),
  batch_lm_bregr_save_1000 = lm_batch_bregr_save(sample_data(data, 1000)),
  batch_lm_bregr_save_parallel_1000 = lm_batch_bregr_save(sample_data(data, 1000), n_workers = 20),
  batch_lm_for_1000 = lm_batch_for(sample_data(data, 1000)),
  
  times = 20
) |> 
  suppressMessages() |> 
  suppressWarnings()

saveRDS(ev, file = "benchmark.rds")
p = ggplot2::autoplot(ev)
p

ggplot2::ggsave(
  "benchmark_lm.pdf", plot = p,
  width = 7, height = 7
)