Analysis class to manage an analysis folder
PMAnalysis.RdThis object can be used to manage an analysis folder within a project. Each analysis contains code, outputs, intermediate results, and logs.
Public fields
pathFull path to the analysis's folder
nameName of the analysis (folder name within analyses/)
project_pathFull path to the project's folder (if created from project)
Methods
Method new()
Create a PMAnalysis object
Usage
PMAnalysis$new(project = NULL, name = NULL, path = NULL)Method get_artifact()
Get an artifact (output file) from another analysis by ID. Searches for files with the given ID (filename without extension) in analysis output directories.
Arguments
idCharacter. The artifact ID (filename without extension).
analysis_nameCharacter. Optional name of the analysis to search in. If not provided, uses the current analysis's name. If explicitly set to
NULL, searches all analyses and fails if not exactly one match is found.
Examples
folder <- withr::local_tempdir()
pm <- pm_create_project(folder)
analysis1 <- pm$create_analysis("data_preparation")
analysis2 <- pm$create_analysis("modeling")
# Create a test output file in analysis1
output <- analysis1$get_output_path("results.csv", type = "table")
write.csv(data.frame(x = 1:5), output$path)
# Get artifact from analysis2 (which gets it from analysis1)
artifact <- analysis2$get_artifact("results", analysis_name = "data_preparation")
# Get artifact from current analysis (default behavior)
artifact <- analysis1$get_artifact("results")
# Get artifact without specifying analysis (if unique across all analyses)
artifact <- analysis2$get_artifact("results", analysis_name = NULL)Method get_intermediate_artifact()
Get an intermediate artifact from the current analysis's intermediate folder.
Searches for existing files with the given ID (filename without extension) in the intermediate directory.
If an existing file is found, returns it. If no existing file is found, returns the path
via get_output_path() (the file may not exist yet).
Examples
folder <- withr::local_tempdir()
pm <- pm_create_project(folder)
analysis <- pm$create_analysis("data_preparation")
# Get artifact from current analysis's intermediate folder
# If file exists, returns it; otherwise returns path for new file
artifact <- analysis$get_intermediate_artifact("temp_data")
if (artifact$exists()) {
data <- artifact$read()
} else {
# File doesn't exist yet, can write to it
artifact$write(data.frame(x = 1:5))
}Method list_outputs()
List all output files in the analysis. Returns a list of PMData objects for all files in the outputs or intermediate directory.
Arguments
intermediateLogical. If TRUE, lists files in intermediate/ folder; if FALSE, in outputs/ folder.
Returns
A list of PMData objects, one for each file found.
Each object has:
id: The file name without extensionpath: The full absolute path to the file
Examples
folder <- withr::local_tempdir()
pm <- pm_create_project(folder)
analysis <- pm$create_analysis("my_analysis")
# Create some output files
output1 <- analysis$get_output_path("results.csv", type = "table")
output2 <- analysis$get_output_path("plot.png", type = "figure")
# List all outputs
outputs <- analysis$list_outputs()
length(outputs) # Number of output files
# List intermediate files
intermediates <- analysis$list_outputs(intermediate = TRUE)Method get_output_path()
Get output path for a file, returning a PMData object. Supports also subfolders using both unix-style and windows-style delimeteres ("/" and "\").
Arguments
nameCharacter. Name of the output file (with or without extension).
typeCharacter. Optional type of output (table, object, image, figure, parquet, csv). If provided and name has no extension, an appropriate extension will be added. If provided and name has an extension, the extension will be validated against the type.
intermediateLogical. If TRUE, file goes in intermediate/ folder; if FALSE, in outputs/ folder.
Returns
A PMData object with:
id: The file name without extensionpath: The full absolute path to the output file
Examples
folder <- withr::local_tempdir()
pm <- pm_create_project(folder)
analysis <- pm$create_analysis("my_analysis")
# Get output path for a CSV file
output <- analysis$get_output_path("results.csv", type = "table")
output$id # "results"
output$path # full path to results.csv in outputs/
# Get intermediate path without extension (will add .parquet for table type)
intermediate <- analysis$get_output_path("temp_data", type = "table", intermediate = TRUE)
intermediate$id # "temp_data"
intermediate$path # full path to temp_data.parquet in intermediate/
# Get output path with nested folders
output2 <- analysis$get_output_path("unique\\complex\\structure.rds")
output2$id # "unique/complex/structure"
output2$pathMethod run_in_slurm()
Run a function in SLURM. Submits the function as a SLURM job and returns a PMSlurmRun object that can be used to check status and retrieve results.
Usage
PMAnalysis$run_in_slurm(fun, ..., result_id = NULL, config = list())Arguments
funFunction. The function to run in SLURM.
...Additional arguments to pass to the function (can be positional or named).
result_idCharacter. Optional ID for the result file (default: "slurm_result"). Must be provided by name if function arguments are passed positionally.
configList. SLURM configuration with optional elements:
job_name: Character. Name for the SLURM job (default: analysis name).time_limit: Character. SLURM time limit (default: "01:00:00").memory: Character. SLURM memory limit (default: "4G").cpus: Integer. Number of CPUs to request (default: 1).slurm_flags: Character. Additional SLURM flags (default: "").modules: Character vector. Modules to load (default: NULL).store_image: Logical, NULL, or vector. TRUE = save full workspace (default); FALSE or NULL = save no environment; a character vector or named list (or mix) = save only those objects (strings = names to look up in the calling environment, named elements = name and value as-is).
Details
This method:
Checks if SLURM is available
Creates an R script that runs the function and saves results
Creates a SLURM script from the template
Submits the job
Returns a PMSlurmRun object for monitoring
Results are stored in the intermediate folder. All temporary files (function files, scripts) are stored in a temporary directory.
Function arguments can be passed positionally or by name. If using positional
arguments, result_id and config must be provided by name.
Examples
\dontrun{
analysis <- pm$get_analysis("my_analysis")
# With named arguments
slurm_run <- analysis$run_in_slurm(function(x, y) {
result <- compute_something(x, y)
return(result)
}, x = 10, y = 20)
# With positional arguments
slurm_run <- analysis$run_in_slurm(function(x) {
return(2 * x)
}, 10, result_id = "my_result")
# Check if done
slurm_run$is_done()
# Get results (with optional timeout to wait)
results <- slurm_run$get_results(timeout = 60)
}
Examples
# Create a project and analysis
folder <- withr::local_tempdir()
pm <- pm_create_project(folder)
# Create a new analysis
analysis <- pm$create_analysis("data_preparation")
analysis
#> PMAnalysis:
#> Name: data_preparation
#> Path: /private/var/folders/0t/mvk3x4hx0pl31l5lcl11krcc0000gn/T/RtmpYlxyhU/file1293372abaf25/analyses/data_preparation
#> Project: /private/var/folders/0t/mvk3x4hx0pl31l5lcl11krcc0000gn/T/RtmpYlxyhU/file1293372abaf25
# Load an existing analysis from project
analysis <- pm$get_analysis("data_preparation")
# Load an existing analysis from path
analysis <- PMAnalysis$new(path = file.path(folder, "analyses", "data_preparation"))
## ------------------------------------------------
## Method `PMAnalysis$get_artifact`
## ------------------------------------------------
folder <- withr::local_tempdir()
pm <- pm_create_project(folder)
analysis1 <- pm$create_analysis("data_preparation")
analysis2 <- pm$create_analysis("modeling")
# Create a test output file in analysis1
output <- analysis1$get_output_path("results.csv", type = "table")
write.csv(data.frame(x = 1:5), output$path)
# Get artifact from analysis2 (which gets it from analysis1)
artifact <- analysis2$get_artifact("results", analysis_name = "data_preparation")
# Get artifact from current analysis (default behavior)
artifact <- analysis1$get_artifact("results")
# Get artifact without specifying analysis (if unique across all analyses)
artifact <- analysis2$get_artifact("results", analysis_name = NULL)
## ------------------------------------------------
## Method `PMAnalysis$get_intermediate_artifact`
## ------------------------------------------------
folder <- withr::local_tempdir()
pm <- pm_create_project(folder)
analysis <- pm$create_analysis("data_preparation")
# Get artifact from current analysis's intermediate folder
# If file exists, returns it; otherwise returns path for new file
artifact <- analysis$get_intermediate_artifact("temp_data")
if (artifact$exists()) {
data <- artifact$read()
} else {
# File doesn't exist yet, can write to it
artifact$write(data.frame(x = 1:5))
}
## ------------------------------------------------
## Method `PMAnalysis$list_outputs`
## ------------------------------------------------
folder <- withr::local_tempdir()
pm <- pm_create_project(folder)
analysis <- pm$create_analysis("my_analysis")
# Create some output files
output1 <- analysis$get_output_path("results.csv", type = "table")
output2 <- analysis$get_output_path("plot.png", type = "figure")
# List all outputs
outputs <- analysis$list_outputs()
length(outputs) # Number of output files
#> [1] 0
# List intermediate files
intermediates <- analysis$list_outputs(intermediate = TRUE)
## ------------------------------------------------
## Method `PMAnalysis$get_output_path`
## ------------------------------------------------
folder <- withr::local_tempdir()
pm <- pm_create_project(folder)
analysis <- pm$create_analysis("my_analysis")
# Get output path for a CSV file
output <- analysis$get_output_path("results.csv", type = "table")
output$id # "results"
#> [1] "results"
output$path # full path to results.csv in outputs/
#> [1] "/private/var/folders/0t/mvk3x4hx0pl31l5lcl11krcc0000gn/T/RtmpYlxyhU/file129334821751a/analyses/my_analysis/outputs/results.csv"
# Get intermediate path without extension (will add .parquet for table type)
intermediate <- analysis$get_output_path("temp_data", type = "table", intermediate = TRUE)
intermediate$id # "temp_data"
#> [1] "temp_data"
intermediate$path # full path to temp_data.parquet in intermediate/
#> [1] "/private/var/folders/0t/mvk3x4hx0pl31l5lcl11krcc0000gn/T/RtmpYlxyhU/file129334821751a/analyses/my_analysis/intermediate/temp_data.parquet"
# Get output path with nested folders
output2 <- analysis$get_output_path("unique\\complex\\structure.rds")
output2$id # "unique/complex/structure"
#> [1] "unique\\complex\\structure"
output2$path
#> [1] "/private/var/folders/0t/mvk3x4hx0pl31l5lcl11krcc0000gn/T/RtmpYlxyhU/file129334821751a/analyses/my_analysis/outputs/unique/complex/structure.rds"
## ------------------------------------------------
## Method `PMAnalysis$run_in_slurm`
## ------------------------------------------------
if (FALSE) { # \dontrun{
analysis <- pm$get_analysis("my_analysis")
# With named arguments
slurm_run <- analysis$run_in_slurm(function(x, y) {
result <- compute_something(x, y)
return(result)
}, x = 10, y = 20)
# With positional arguments
slurm_run <- analysis$run_in_slurm(function(x) {
return(2 * x)
}, 10, result_id = "my_result")
# Check if done
slurm_run$is_done()
# Get results (with optional timeout to wait)
results <- slurm_run$get_results(timeout = 60)
} # }