| Title: | Dump 'R' Package Source, Documentation, and Vignettes into One File |
|---|---|
| Description: | Dump source code, documentation and vignettes of an 'R' package into a single file. Supports installed packages, tar.gz archives, and package source directories. If the package is not installed, only its source is automatically downloaded from CRAN for processing. The output is a single plain text file or a character vector, which is useful to ingest complete package documentation and source into a large language model (LLM) or pass it further to other tools, such as 'ragnar' <https://github.com/tidyverse/ragnar> to create a Retrieval-Augmented Generation (RAG) workflow. |
| Authors: | Egor Kotov [aut, cre, cph] (ORCID: <https://orcid.org/0000-0001-6690-5345>) |
| Maintainer: | Egor Kotov <[email protected]> |
| License: | MIT + file LICENSE |
| Version: | 0.3.0 |
| Built: | 2026-05-31 09:50:39 UTC |
| Source: | https://github.com/e-kotov/rdocdump |
This function extracts the R source code from a package. For installed
packages, it retrieves the package namespace and deparses all functions found
in the package. For package source directories or archives (non-installed
packages), it reads all .R files from the R directory and, optionally,
from the tests directory. Optionally, it can include roxygen2 documentation
from these files.
rdd_extract_code( pkg, file = NULL, include_tests = FALSE, include_roxygen = FALSE, force_fetch = FALSE, version = NULL, cache_path = getOption("rdocdump.cache_path"), keep_files = "none", repos = getOption("rdocdump.repos", getOption("repos")) )rdd_extract_code( pkg, file = NULL, include_tests = FALSE, include_roxygen = FALSE, force_fetch = FALSE, version = NULL, cache_path = getOption("rdocdump.cache_path"), keep_files = "none", repos = getOption("rdocdump.repos", getOption("repos")) )
pkg |
A
|
file |
Optional. Save path for the output text file. If set, the
function will return the path to the file instead of the combined text.
Defaults to |
include_tests |
|
include_roxygen |
|
force_fetch |
|
version |
Optional. A |
cache_path |
A |
keep_files |
A
|
repos |
A |
For remote repositories, rdocdump uses pak for resolution. If pak
cannot find an R package at the root or the specified subdirectory, the
function will automatically fall back to downloading the full repository
and searching for the shallowest directory containing a DESCRIPTION file.
A single string containing the combined R source code (and, optionally, roxygen2 documentation) from the package.
# Extract only R source code (excluding roxygen2 documentation) from an # installed package. code <- rdd_extract_code("splines") cat(substr(code, 1, 1000)) # Extract R source code including roxygen2 documentation from a package # source directory. # set cache directory for `rdocdump` rdd_set_cache_path(paste0(tempdir(), "/rdocdump_cache")) local({ code_with_roxygen <- rdd_extract_code( "ini", include_roxygen = TRUE, force_fetch = TRUE, repos = c("CRAN" = "https://cran.r-project.org") ) cat(substr(code_with_roxygen, 1, 1000)) }) # Extract R source code from a package source directory, # including test files but excluding roxygen2 docs. local({ code_with_tests <- rdd_extract_code( "ini", include_roxygen = TRUE, include_tests = TRUE, force_fetch = TRUE, repos = c("CRAN" = "https://cran.r-project.org") ) cat(substr(code_with_tests, 1, 1000)) }) # clean cache directory unlink(getOption("rdocdump.cache_path"), recursive = TRUE, force = TRUE)# Extract only R source code (excluding roxygen2 documentation) from an # installed package. code <- rdd_extract_code("splines") cat(substr(code, 1, 1000)) # Extract R source code including roxygen2 documentation from a package # source directory. # set cache directory for `rdocdump` rdd_set_cache_path(paste0(tempdir(), "/rdocdump_cache")) local({ code_with_roxygen <- rdd_extract_code( "ini", include_roxygen = TRUE, force_fetch = TRUE, repos = c("CRAN" = "https://cran.r-project.org") ) cat(substr(code_with_roxygen, 1, 1000)) }) # Extract R source code from a package source directory, # including test files but excluding roxygen2 docs. local({ code_with_tests <- rdd_extract_code( "ini", include_roxygen = TRUE, include_tests = TRUE, force_fetch = TRUE, repos = c("CRAN" = "https://cran.r-project.org") ) cat(substr(code_with_tests, 1, 1000)) }) # clean cache directory unlink(getOption("rdocdump.cache_path"), recursive = TRUE, force = TRUE)
rdocdump Repository OptionsThis function returns the current repository URLs used by rdocdump. The
default is set to the CRAN repository at "https://cloud.r-project.org". This
does not affect the repositories used by install.packages() in your current
R session and/or project. To set repository options, use
rdd_set_repos.
rdd_get_repos()rdd_get_repos()
A character vector of repository URLs.
# Get current rdocdump repository options rdd_get_repos()# Get current rdocdump repository options rdd_get_repos()
rdocdump Cache Path in the Current R SessionThis function sets the cache path used by rdocdump to store temporary
files (downloaded tar.gz archives and/or extracted directories) for the
current R session. The cache path is stored in the option
"rdocdump.cache_path", which can be checked with
getOption("rdocdump.cache_path"). The path is created if it does not
exist.
rdd_set_cache_path(path)rdd_set_cache_path(path)
path |
A |
Invisibly returns the new cache path.
# set cache directory for `rdocdump` rdd_set_cache_path(paste0(tempdir(), "/rdocdump_cache")) # default cache directory unlink(getOption("rdocdump.cache_path"), recursive = TRUE)# set cache directory for `rdocdump` rdd_set_cache_path(paste0(tempdir(), "/rdocdump_cache")) # default cache directory unlink(getOption("rdocdump.cache_path"), recursive = TRUE)
rdocdump Repository OptionsThis function sets the package repository URLs used by rdocdump when
fetching package sources. May be useful for setting custom repositories or
mirrors. This does not affect the repositories used by install.packages()
in your current R session and/or project.
rdd_set_repos(repos)rdd_set_repos(repos)
repos |
A character vector of repository URLs. |
Invisibly returns the new repository URLs.
# Set rdocdump repository options rdd_set_repos(c("CRAN" = "https://cloud.r-project.org"))# Set rdocdump repository options rdd_set_repos(c("CRAN" = "https://cloud.r-project.org"))
This function produces a single text output for an R package by processing its documentation (Rd files from the package source or the documentation from already installed packages), vignettes, and/or R source code.
rdd_to_txt( pkg, file = NULL, content = "all", force_fetch = FALSE, version = NULL, keep_files = "none", cache_path = getOption("rdocdump.cache_path"), repos = getOption("rdocdump.repos", getOption("repos")) )rdd_to_txt( pkg, file = NULL, content = "all", force_fetch = FALSE, version = NULL, keep_files = "none", cache_path = getOption("rdocdump.cache_path"), repos = getOption("rdocdump.repos", getOption("repos")) )
pkg |
A
|
file |
Optional. Save path for the output text file. If set, the
function will return the path to the file instead of the combined text.
Defaults to |
content |
A character vector specifying which components to include in the output. Possible values are:
You can specify multiple options (e.g., |
force_fetch |
|
version |
Optional. A |
keep_files |
A
|
cache_path |
A |
repos |
A |
A single string containing the combined package documentation,
vignettes, and/or code as specified by the content argument. If the
file argument is set, returns the path to the file.
# Extract documentation for built-in `stats` package (both docs and # vignettes). docs <- rdd_to_txt("splines") cat(substr(docs, 1, 500)) ## Not run: # Extract from GitHub repository docs <- rdd_to_txt("r-lib/rlang") # Extract specific version from GitHub docs <- rdd_to_txt("r-lib/[email protected]") # Extract from GitLab docs <- rdd_to_txt("gitlab::user/repo") # Auto-discovery of packages in subdirectories (e.g., if repo root is not the pkg) docs <- rdd_to_txt("ipeaGIT/r5r") # Manual subdirectory specification (useful for disambiguation) docs <- rdd_to_txt("ipeaGIT/r5r/r-package") ## End(Not run) ## Not run: # set cache directory for `rdocdump` rdd_set_cache_path(paste0(tempdir(), "/rdocdump_cache")) # Extract only documentation for rJavaEnv by downloading its source from CRAN docs <- rdd_to_txt( "rJavaEnv", force_fetch = TRUE, content = "docs", repos = c("CRAN" = "https://cran.r-project.org") ) lines <- unlist(strsplit(docs, "\n")) # Print the first 3 lines cat(head(lines, 3), sep = "\n") # Print the last 3 lines cat(tail(lines, 3), sep = "\n") # clean cache directory unlink(getOption("rdocdump.cache_path"), recursive = TRUE, force = TRUE) ## End(Not run)# Extract documentation for built-in `stats` package (both docs and # vignettes). docs <- rdd_to_txt("splines") cat(substr(docs, 1, 500)) ## Not run: # Extract from GitHub repository docs <- rdd_to_txt("r-lib/rlang") # Extract specific version from GitHub docs <- rdd_to_txt("r-lib/[email protected]") # Extract from GitLab docs <- rdd_to_txt("gitlab::user/repo") # Auto-discovery of packages in subdirectories (e.g., if repo root is not the pkg) docs <- rdd_to_txt("ipeaGIT/r5r") # Manual subdirectory specification (useful for disambiguation) docs <- rdd_to_txt("ipeaGIT/r5r/r-package") ## End(Not run) ## Not run: # set cache directory for `rdocdump` rdd_set_cache_path(paste0(tempdir(), "/rdocdump_cache")) # Extract only documentation for rJavaEnv by downloading its source from CRAN docs <- rdd_to_txt( "rJavaEnv", force_fetch = TRUE, content = "docs", repos = c("CRAN" = "https://cran.r-project.org") ) lines <- unlist(strsplit(docs, "\n")) # Print the first 3 lines cat(head(lines, 3), sep = "\n") # Print the last 3 lines cat(tail(lines, 3), sep = "\n") # clean cache directory unlink(getOption("rdocdump.cache_path"), recursive = TRUE, force = TRUE) ## End(Not run)