director-exists.R

              
            
              #' Determine whether a resource exists relative to a director object.
#'
#' @param resource character. The name of the resource.
#' @param helper logical. Whether or not to check helper existence
#'   in an idempotent resource. The default is \code{FALSE}.
#' @return \code{TRUE} or \code{FALSE} according as it does or does not
#'   exist.
#' @examples 
#' \dontrun{
#'   # Imagine we have a file structure:
#'   #   - foo
#'   #     - one
#'   #       - one.R
#'   #       - helper.R
#'   #     - two.R
#'   #
#'   # Then the bellow will return \code{TRUE}, \code{FALSE}, and \code{TRUE},
#'   # respectively. Note that the \code{"helper.R"} file is not considered a
#'   # resource by the director as \code{"one.R"} shares its name with its
#'   # parent directory and is considered the accessible resource.
#'
#'   d <- director('foo')
#'   d$exists('one')
#'   d$exists('one/helper')
#'   d$exists('two')
#' }
director_exists <- function(resource, helper = FALSE) {
            
                "Determine whether or not a resource exists in this director structure."

            

We want to handle inputs like “foo.R” gracefully, since there is no reason to make life painful for the user.

                resource <- strip_r_extension(resource)

            

Since R is a language that does not have good support for mixins and hierarchical structure, we will borrow ideas from node.js, which has a similar problem. If a file becomes too complex, we should be able to split it up into multiple pieces – but only “export” a single thing. You can do this by turning moving your “file.R” to the directory “file” (note that the name must match) and placing your old file in “file/file.R”. Any additional files in the “file” directory, like “file/helper_function.R” or “file/constants.R” will not be detectable to the director object. This allows us to follow the important developer principles of Don't Repeat Yourself and maintaining modularity without polluting what our director sees.

Idempotence only applies to files in the same directory. If you have files “foo/bar/bar.R”, “foo/bar/baz.R”, and “foo/bar/bux/bux.R”, then “foo/bar/bux” is considered another director resource. This allows for hierarchical resource structures that can still maintain their respective helpers (although it may take a little bit of getting used to).

                if (basename(dirname(resource)) == basename(resource)) {
    resource <- dirname(resource)
  }

            

It is always preferable to wrap logical conditions with isTRUE. We will receive a warning if length(helper) != 1 and we will just straight up error if helper is not a logical vector!

                if (isTRUE(helper))  {
            

absolute is a method on the director class.

                  extensionless_exists(self$absolute(resource))
  } else {
            

If the director find method returns an exact match for this resource (i.e., find("foo") is "foo"), the resource exists and we're in business!

                  length(self$find(resource, method = "exact", by_mtime = FALSE)) == 1
  }
}

            

director-filename.R

              
            
              #' Convert a resource to a filename.
#'
#' @param name character. The resource name to convert to a full path.
#' @param absolute character. Whether or not to return the absolute path
#'    (i.e., prepended by the director root). The default is \code{FALSE}.
#' @param check.exists logical. Whether or not to check if the file exists.
#'    The default is \code{TRUE}. This argument should be set to false if the
#'    we are certain the file exists and can skip the check.
#' @param helper logical. Whether or not to handle helper files instead of
#'   resources. The default is \code{FALSE}.
#' @param enclosing logical. If \code{TRUE} and \code{name} refers to an
#'   idempotent resource the directory name will be returned instead of the
#'   .R file.
#' @return the absolute path, relative to the director root if
#'    \code{absolute = FALSE} and an absolute path if \code{absolute = TRUE}.
director_filename <- function(name, absolute = FALSE, check.exists = TRUE,
                              helper = FALSE, enclosing = FALSE) {
            
                "Convert a resource name to a file name."

  if (isTRUE(check.exists) && !self$exists(name, helper = isTRUE(helper))) {
    stop("Cannot convert resource ", crayon::red(name), " to full file path, ",
         "as no such resource exists.")
  }

  if (isTRUE(file.info(file.path(self$root(), name))$isdir)) {
    idempotent <- TRUE
    file <- complete_extension(file.path(name, basename(name)), self$root())
  } else {
    idempotent <- FALSE
    file <- complete_extension(name, self$root())
  }

  if (isTRUE(absolute)) {
    file <- file.path(self$root(), file)
  }
  
  if (isTRUE(enclosing) && idempotent) {
    dirname(file) 
  } else {
    file
  }
}
            

director-find.R

              
            
              #' Find resources within a director project.
#'
#' @note
#' The available search methods are:
#'
#' \describe{
#'   \item{wildcard}{Similar to Sublime or vim's ctrl + P, this method
#'     of search will look for consecutive appearances of characters.
#'     For example, if we have a resource \code{"some_resource"}, then
#'     looking for \code{"so"}, \code{"sre"} or even \code{"smsrc"} will
#'     return a match, since those characters occur consecutively in the
#'     original resource name.}
#'   \item{partial}{This method will try to find a substring that
#'     matches the resource name. For example, if we have
#'     \code{"dir/some_resource"}, then looking for \code{"dir/some"} will
#'     return a match.}
#'   \item{exact}{The exact name of the resource. In this mode, either a 
#'     single string (the resource name itself) or \code{character(0)} will
#'     be returned.}
#' }
#'
#' @param pattern character. The resources to search for. The default is
#'    \code{""}, which will list all resources within the \code{base}.
#' @param method character. The search method. The available options
#'    are \code{"wildcard"}, code{"substring"}, or \code{"exact"}. See the function
#'    description for the full explanation of these methods. The default is
#'    \code{"wildcard"}.
#' @param base character. A prefix under which to look for. For example,
#'    if \code{base = "subdir"}, then only resources under the \code{"subdir"}
#'    directory (relative to the director root) will be returned. The default is
#'    \code{""}, which will list all resources within the director root.
#' @param by_mtime logical. Whether or not to sort results by modification time
#'    in descending order. The default is \code{TRUE}, so that the first result
#'    is the most recently modified resource.
#' @return a character vector of matched resources.
#' @examples
#' \dontrun{
#'   # Imagine we have a file structure:
#'   #   - foo
#'   #     - one
#'   #       - one.R
#'   #       - helper.R
#'   #     - two.R
#'   #
#'   # Then the bellow will return \code{"foo/one"}, \code{"two"}, and \code{""},
#'   # respectively. Note that the \code{"helper.R"} file is not considered a
#'   # resource by the director as \code{"one.R"} shares its name with its
#'   # parent directory and is considered the accessible resource.
#'
#'   d <- director('foo')
#'   d$find('fone', method = 'wildcard') # "foo/one"
#'   # Under the hood, this looks for the regex .*f.*o.*n.*e.*
#'   d$find('wo',   method = 'partial')  # "two"
#'   d$find('none', method = 'exact')    # ""
#' }
director_find <- function(pattern = "", method = "wildcard", base = "", by_mtime = TRUE) {
            
                "Look for resources by wildcard, partial, or exact matches."

  enforce_type(base, "character", "director$find")

            

If multiple base paths are provided, union together the results of calling find on each base individually.

                if (length(base) > 1) {
            

The R Inferno recommends pre-allocating vectors for performance benefits.

                  all <- vector('list', length(base))
    for (i in seq_along(all)) {
            

Although we need to do this instead of using lapply so we can take advantage of Recall.

                    all[[i]] <- Recall(pattern = pattern, method = method,
                         base = base[i], by_mtime = FALSE)
      # TODO: (RK) Re-sort by modification time: https://github.com/robertzk/director/issues/19
    }
            

Using c with recursive = TRUE will collapse the list of character vectors into a single character vector.

                  c(all, recursive = TRUE)
  } else {
            

This function is already getting too complex, so delegate the job of actually finding resources that match this pattern to a helper function.

The keyword self is an R6 class keyword
that represents the environment containing this R6 object. It is similar to other languages' this keyword.

                  find_(self, pattern, method, base, by_mtime)
  }
}

find_ <- function(director, pattern, method, base, by_mtime) {
            

Listing all the files below will be slow if we need to look for an exact match, so we implement a separate helper for finding exact matches.

                if (identical(tolower(method), "exact")) {
    return(exact_find(director, pattern, base))
  }

  all_files <- list.files(file.path(director$root(), base),
                          pattern = "\\.[rR]$", recursive = TRUE)
  all_files <- strip_r_extension(all_files)

            

We will use the apply_pattern helper below, so all patterns should be search_pattern S3 objects.

                if (!is.search_pattern(pattern)) {
    pattern <- search_pattern(pattern, method)
  }

            

First, remove helper files from consideration.

                all_files <- apply_pattern(search_pattern("", "idempotence"), all_files)
  
            

Now filter to those files that satisfy the pattern. For example, if we used pattern = search_pattern("foo", "partial"), we would find files that contain “foo” as a substring.

                resources <- apply_pattern(pattern, all_files)

  if (nzchar(base)) resources <- file.path(base, resources)
  sort_by_mtime(resources, by_mtime, director)
}

sort_by_mtime <- function(files, by_mtime, director) {
  if (isTRUE(by_mtime)) {
    modified_time <- function(file) {
            

director$filename(..., absolute = TRUE) will convert the resource name into a full file path.

                    file.info(director$filename(file, absolute = TRUE,
                                  check.exists = FALSE))$mtime
    }

    by_modification_time <- vapply(files, modified_time, numeric(1))
    files[order(by_modification_time, decreasing = TRUE)]
  } else {
    files
  }
}

exact_find <- function(director, pattern, base) {
  if (nzchar(base)) pattern <- file.path(base, pattern)

            

A resource “foo” can correspond to either “foo.r”, “foo.R”, “foo/foo.r”, or “foo/foo.R”.

                candidates <- as.character(t(outer(
    c(pattern, file.path(pattern, basename(pattern))),
    c(".r", ".R"), paste0
  )))

            

However, if it is “foo.r” or “foo.R”, we must ensure it is not the helper of an idempotent resource. If the resource is prefixed by, say, “bar”, we check “bar/bar.r” and “bar/bar.R” in addition to “bar/foo.r” and “bar/foo.R”.

                to_idempotent <- function(f) {
    file.path(dirname(f), paste0(basename(dirname(f)), c(".r", ".R")))
  }
  absolute_candidates <- file.path(director$root(), candidates)
  absolute_candidates <- c(absolute_candidates,
    sapply(absolute_candidates[1], to_idempotent))

            

Batching the exists check is faster because there is only one system call.

                exists <- file.exists(absolute_candidates)

            

If “foo.r” or “foo.R” exist but they are helpers, do not treat them as resources.

                if (any(exists[1:2]) && any(exists[5:6])) character(0) 
            

Otherwise, if it is a proper resource, select the first match.

                else if (any(exists[1:4])) pattern
  else character(0)
}

            

director-initialize.R

              
            
              #' Initialize a director object.
#'
#' @param root character. The root directory for the director.
#' @param project_name character. The name of the director project. Useful for
#'    error messages. For example, if a resource is not found, an error message
#'    to the effect of "no resource foo found in your \code{project_name}
#'    project" will be displayed.
#' @return a \code{director} reference class object.
#' @examples
#' \dontrun{
#' director(tempdir())
#' director(tempdir(), "my project") # Error messages on using the director's
#'                                   # methods will now usually result in
#'                                   # the ending "in project 'my project'".
#' }
initialize <- function(root, project_name = '') {
            

Reference class objects are sometimes initialized on package install, but no arguments are passed! We let it through to avoid installation problems.

                if (missing(root)) return() 

  enforce_type(project_name, "character", "director$new")

  if (length(project_name) != 1) {
    stop("Project name for ", crayon::blue("director$new"), " call must ",
         "be a scalar character, but has length ",
         crayon::red(as.character(length(project_name))), ".")
  }

  if (!file.exists(root)) {
    stop("Cannot create a director for ", crayon::red(root), " as that directory ",
          "does not exist.")
  }

  if (!file.info(root)$isdir) {
    stop("Cannot create a director for ", crayon::red(root), " as that is a file ",
          "and not a directory.")
  }

  # Set R6 fields.
  self$.dependency_nesting_level <<- 0L
  self$.root             <<- normalizePath(root)
  # TODO: (RK) Customize location of the registry: https://github.com/robertzk/director/issues/20
  self$.registry         <<- registry(file.path(self$.root, '.registry'))
  self$.project_name     <<- project_name
  self$dependency_stack  <<- shtack$new()

  self$.resource_cache   <<- list()
  self$.parsers          <<- list()
  self$.preprocessors    <<- list()
  self$.cached_resources <<- list()
  self$cache             <<- simple_cache()

            

We need a unique identifier for each director object, so we can keep track of state separately in the active_resource helper.

                .director_env$count <- (.director_env$count %||% 0) + 1
  self$.id <<- .director_env$count
}

            

director-package.R

              
            
              #' Management and Tracking of Files in R Projects.
#'
#' The director package is responsible for managing and loading resources in
#' some fixed directory structure. It has support for tracking changes between
#' consecutive loads of resources (so that we can tell if a script was modified
#' since we last ran it) and defining parsers that allow us to generalize from
#' the pernicious simple linear execution that is common to R.
#'
#' @docType package
#' @name director
#' @import digest crayon R6 methods
NULL

            

Used to keep track of what directors are currently active.

              .director_env <- new.env(parent = emptyenv())
            

director-parsers.R

              
            
              #' Register a resource parser.
#'
#' @param path character. The prefix to look for in the director.
#' @param parser function. 
#' @param overwrite logical. If \code{TRUE}, \code{register_parser} will overwrite
#'   the route instead of erroring if the path already has a registered
#'   parser. The default is \code{FALSE}.
#' @param cache logical. Whether or not to cache resources processed with this
#'   parser. Cached resources will not be re-parsed if their dependencies have
#'   not been modified. This distinction is important, as most resources are
#'   factory resources (the object they generate should not be shared across
#'   the entire project; instead, a copy should be made). The default is \code{FALSE}.
#' @examples
#' \dontrun{
#'   d <- director('some/project')
#'   d$register_parser('models', function() { print("I am a ", resource, ", a model!") })
#'   r <- d$resource('models/some_model.R')
#'   r$value() # Will print: I am a models/some_model, a model!
#' }
register_parser <- function(path, parser = function() { }, overwrite = FALSE, cache = FALSE) {
  enforce_type(path,   "character", "director$register_parser")
  enforce_type(parser, "function",  "director$register_parser")

  if (length(path) != 1) {
    stop("A parser must be registered to a path that is a scalar character ",
         "but instead I got a character vector of length",
          crayon::red(as.character(length(path))), ".")
  }

  if (length(formals(parser)) != 0) {
    # TODO: (RK) Require correct formals specification: https://github.com/robertzk/director/issues/21
    formals(parser) <- NULL
  }
  
  if (is.element(paste0("/", path), names(self$.parsers)) && !isTRUE(overwrite)) {
    stop("Parser already registered for path ", crayon::red(path))
  }

  if (isTRUE(cache)) {
    self$.cached_resources <<- c(self$.cached_resources, path)
  }

  # Prefix "/" for empty paths.
  self$.parsers[[paste0("/", path)]] <<- parser

            

We store each parser function by path in descending order by length. This will favor paths that are more fully specified. For example, if we have a parser for "models" and a parser for "models/ensembles", the latter has a longer length and will be preferred when selecting the function used for parsing resources in the "models/ensembles" directory.

                self$.parsers         <<- self$.parsers[names(self$.parsers)[rev(order(sapply(names(self$.parsers), nchar)))]]

  check_if_parser_and_preprocessor_are_identical(self, path)
  invisible(TRUE)
}

            

director-preprocessors.R

              
            
              #' Register a resource preprocessor
#'
#' @param path character. The prefix to look for in the director.
#' @param preprocessor function. 
#' @param overwrite logical. If \code{TRUE}, \code{register_preprocessor} will overwrite
#'   the route instead of erroring if the path already has a registered
#'   preprocessor. The default is \code{FALSE}.
#' @examples
#' \dontrun{
#'   d <- director("some/project")
#'   d$register_preprocessor('models', function() { print("I am a ", resource, ", a model!") })
#'   r <- d$resource("models/some_model.R")
#'   r$value() # Will print: I am a models/some_model, a model!
#' }
register_preprocessor <- function(path, preprocessor, overwrite = FALSE) {
  enforce_type(path,         "character", "director$register_preprocessor")
  enforce_type(preprocessor, "function",  "director$register_preprocessor")

  if (length(path) != 1) {
    stop("A preprocessor must be registered to a path that is a scalar character ",
         "but instead I got a character vector of length",
          crayon::red(as.character(length(path))), ".")
  }

  if (length(formals(preprocessor)) != 0) {
    # TODO: (RK) Require correct formals specification: https://github.com/robertzk/director/issues/21
    formals(preprocessor) <- NULL
  }
  
  if (is.element(paste0("/", path), names(self$.preprocessors)) && !isTRUE(overwrite)) {
    stop("Preprocessor already registered for path ", crayon::red(path), ".")
  }

  # Prefix "/" for empty paths.
  self$.preprocessors[[paste0("/", path)]] <<- preprocessor

            

We store each preprocessor function by path in descending order by length. This will favor paths that are more fully specified. For example, if we have a preprocessor for "models" and a preprocessor for "models/ensembles", ## the latter has a longer length and will be preferred when selecting the function used for parsing resources in the "models/ensembles" directory.

                self$.preprocessors         <<- self$.preprocessors[
    names(self$.preprocessors)[rev(order(sapply(names(self$.preprocessors), nchar)))]]

  check_if_parser_and_preprocessor_are_identical(self, path)
  invisible(TRUE)
}


#' Whether there exists a preprocessor for a resource.
#'
#' @param resource_path character. The resource name.
#' @return \code{TRUE} or \code{FALSE} depending on whether there
#'   is a preprocessor for this resource.
has_preprocessor <- function(resource_path) {
  !is.null(self$match_preprocessor(resource_path))
}

            

director-resource.R

              
            
              #' Fetch a resource relative to a director object.
#'
#' Resources are R scripts that optionally have a "parser" attached
#' which takes the result of executing the file, including all of its local
#' variables, and does some additional computation. This is useful if,
#' for example, you are trying to define a standard format for creating a
#' reference class object by specifying some inputs, but want to make it
#' easy to provide those inputs by users.
#' 
#' This method will return a \code{directorResource} object that represents
#' that particular R script. A resource can have a \code{\link[=register_preprocessor]{preprocessor}}
#' and a \code{link[=register_parser]{parser}} attached to it.
#'
#' The former determines how to source the R file. For example, if you need
#' to inject additional variables prior to sourcing it, you can do so
#' from the preprocessor.
#' 
#' The parser determines what to do with the R file after sourcing it.
#' It can tell what the dependencies of the file are (i.e., what other
#' resources were used when sourcing it), and whether or not it was modified
#' (i.e., whether the R file or any of its dependencies were modified).
#' 
#' Together, a preprocessor, parser, and source file compose a resource.
#'
#' @param name character. The name of the resource (i.e. R script) relative
#'   to the root of the director object.
#' @param provides list or environment. A list or environment of values to provide
#'   to the resource. The default is nothing, i.e., \code{list()}. Note that
#'   \code{provides} will be coerced to an environment, and its parent 
#'   environment will be set to \code{parent.env(topenv())} to prevent
#'   access to global variables (and encourage modularity and lack of side
#'   effects. There should always be a way to write your code without them).
#' @param body logical. Whether or not to fetch the body of the resource.
#' @param soft logical. Whether or not to modify the cache to reflect
#'   the resource modification time and other details.
#' @param tracking logical. Whether or not to perform modification tracking
#'   by pushing accessed resources to the director's stack.
#' @param helper logical. If \code{TRUE}, allow processing of helper files.
# TODO: (RK) Explain idempotence and helpers more: https://github.com/robertzk/director/issues/23
#'   If a file shares its name with the parent directory (e.g., "foo"
#'   and "foo/foo.R"), it is called an idempotent resource. Any other files
#'   in the same directory as the idempotence resource, besides the file
#'   itself, are called helper files, and are usually invisible to the
#'   director object (e.g., "foo/other.R" if "foo/foo.R" exists).
#'
#'   If \code{helper = TRUE}, these will temporarily be treated as a
#'   resource so that we can track whether they were modified and re-use
#'   other \code{directorResource} features. By default, \code{helper = FALSE}.
#' @return A \code{\link{directorResource}} object.
resource <- function(name, ..., defining_environment. = parent.frame()) {
  force(defining_environment.)
  resource <- director_resource(self, resource_name(name), defining_environment.)
  process_resource(resource, ...)
}

            

director.R

              
            
              #' @include director-exists.R director-filename.R director-find.R
#' @include director-initialize.R director-parsers.R
#' @include director-preprocessors.R director-resource.R
#' @include utils.R
NULL

# NOTE: This file is prepended with "zzz_" to ensure other files are parsed
# first. This is because the DESCRIPTION file's "Collate" directive is obsolete.

#' A director is an \link[=https://github.com/wch/R6]{R6 class} responsible for
#' a collection of file-traversal related responsibilities.
#'
#' @details Throughout, a "resource" refers to an R script
#'   with possible helper functions. A resource with helpers is identified
#'   by the following heuristic: if a filename sans extension is identical
#'   to its directory name, it is considered the primary accessible "resource"
#'   with any accompanying helpers (files in the same directory) natively
#'   invisible to the directory. For example, a file called \code{"foo.R"}
#'   residing in directory \code{"foo"} will be accessible from the director
#'   object using \code{director_object$resource('foo')}, but any other R scripts
#'   in the \code{"foo"} directory will not be visible to the director object.
#' 
#'   With this definition of resource, a director manages all of the following
#'   relative to a root directory. 
#'
#' \describe{
#'   \item{"Loading"}{Grabbing resources relative to the root directory
#'      using the \code{resource} method. This also provides information
#'      about the last time the resource was grabbed (modification time,
#'      previous body).}
#'   \item{"Tracking"}{Besides tracking information about the loaded resource
#'      (and any previous versions of the loaded resource), the director also
#'      maintains a stack of resources that have been loaded so far. This allows
#'      one to, for example, force clear the stack, execute some code, and have
#'      a list of resources that were relevant to the computation. The current
#'      stack is available with \code{director_object$stack(all = TRUE)}, which
#'      will also clear it.}
#'   \item{"Parsers"}{Some resources are not intended to be merely executed,
#'      but also parsed. For example, if we define several functions in an
#'      R script, we have access to those functions if we had sourced it
#'      with \code{base::source(script, local = some_environment)} as they
#'      are now present in \code{some_environment}. A parser is a wrapper
#'      around loading of resources that allows one to do some further
#'      computation with this information. For example, we may define
#'      a \code{read} and \code{write} function, but parse this information
#'      into some IO object when fetching the resource. Thus, a resource
#'      can function as inputs to some parser that produces some final
#'      resource object. Parsers can be defined for full directories or
#'      specific files.}
#' }
#'
#' @docType class
#' @rdname director
#' @format NULL
director_ <- R6::R6Class("director",
  portable = TRUE,                        
  private = list(
  ),
  public = list(                    
    .root           = NULL, # character
    .project_name   = NULL, # character
    .resource_cache = list(), # list
    .registry       = NULL, # registry
    .dependency_nesting_level = 0, # integer
    .parsers        = list(), # list
    .preprocessors  = list(), # list
    .cached_resources = list(), # character
    .id             = NULL, # integer
    # Members
    dependency_stack = NULL, # stack
    cache            = NULL,

    # Methods
    initialize = initialize,
    exists     = director_exists,
    resource   = resource,
    #virtual_resource = virtual_resource,
    register_parser = register_parser,
    register_preprocessor = register_preprocessor,
    has_preprocessor = has_preprocessor,
    find       = director_find,

    match_preprocessor = function(resource_name) {
      Find(function(x) substring(resource_name, 1, nchar(x)) == x,
           substring(names(self$.preprocessors), 2))
    },
    match_parser = function(resource_name) {
      Find(function(x) substring(resource_name, 1, nchar(x)) == x,
           substring(names(self$.parsers), 2))
    },
    preprocessor = function(x) self$.preprocessors[[paste0("/", x)]],
    parser = function(x) self$.parsers[[paste0("/", x)]],
    cached_resources = function() self$.cached_resources,

    cache_get = function(k) { self$cache$get(k) },
    cache_set = function(k, v) { self$cache$set(k, v) },
    cache_exists = function(k) { self$cache$exists(k) },

    tracking_dependencies = function() { self$.dependency_nesting_level > 0L },
    clear_resource_stack = function() { if (self$.dependency_nesting_level == 0) self$dependency_stack$clear() },
    increment_nesting_level = function() { self$.dependency_nesting_level <<- self$.dependency_nesting_level + 1L },
    decrement_nesting_level = function() { self$.dependency_nesting_level <<- self$.dependency_nesting_level - 1L },
    nesting_level = function() { self$.dependency_nesting_level },
    push_dependency = function(dependency) {
      self$dependency_stack$push(dependency)
    },
    peek_dependency_stack = function(...) { self$dependency_stack$peek(...) },
    pop_dependency_stack = function() { self$dependency_stack$pop() },
    empty_dependency_stack = function() { self$dependency_stack$empty() },

    root         = function() { self$.root },
    project_name = function() { self$.project_name },
    absolute   = function(x) { file.path(self$root(), x) },
    show       = function() {
      cat(sep = '', "Director object",
          if (isTRUE(nzchar(self$.root))) paste0(" monitoring ", sQuote(self$.root),
            if (isTRUE(nzchar(self$.project_name))) paste(" for", self$.project_name, "project")),
          ".\n")
    },
    filename  = director_filename
  )
)

director <- structure(
  function(...) { director_$new(...) },
  class = "director_"
)

`$.director_` <- function(...) {
  stopifnot(identical(..2, "new"))
  ..1
}

#' @export
print.director <- function(x, ...) {
  x$show()
}

#' @docType function
#' @name director
#' @export
NULL

#' Whether or not an object is a director.
#' @param x ANY.
#' @export
is.director <- function(x) is(x, 'director')

#' If the parser and preprocessor for a path is the same, the user has probably made a mistake.
#'
#' @param director director. The director object.
#' @param path character. The path to check for whether the preprocessor and
#'   parser are identical.
#'
#' @return Nothing, but issue a warning in red crayon if they are identical,
#'   since it likely means the user forgot to specify a parser.
check_if_parser_and_preprocessor_are_identical <- function(director, path) {
  has_same_body <- function(fn1, fn2) {
    is.function(fn1) && is.function(fn2) && 
      isTRUE(all.equal(body(fn1), body(fn2)))
  }
  if (has_same_body(director$.parsers[[paste0("/", path)]],
                    director$.preprocessors[[paste0("/", path)]])) {
    warning(crayon::red("The path at ", sQuote(path), " has the same",
                        "preprocessor and parser -- are you sure you",
                        "included a parser?"))
  }
}

            

process_resource.R

              
            
              # Minimalist persistent global state.
director_state <- new.env(parent = emptyenv())

director_resource <- function(director, name, defining_environment) {
  structure(list(
    director = director,
    name = name,
    defining_environment = defining_environment
  ), class = "director_resource")
}

# Construct a resource-compiling tower.
process_resource <- function(resource, ...) {
  enforce_type(resource, "director_resource", "process_resource")

  tower(
    virtual_check        %>>%
    modification_tracker %>>%
    dependency_tracker   %>>% 
    caching_layer        %>>%
    preprocessor         %>>%
    parser               
  )(active_resource(resource), ...)
}

# An active resource is just a list that holds a resource,
# but also an "injects" environment and "state", which is
# like the equivalent of the Haskell IO monad.
active_resource <- function(resource) {
  structure(class = "active_resource", list(
    resource = resource,
    injects  = new.env(parent = topenv(resource$defining_environment)),
    state    = generate_state(resource)
  ))
}

# Generate the persistent global state for a resource.
generate_state <- function(resource) {
  # TODO: (RK) Issue warnings when directors on the same directory with the
  # same project name are instantiated, as they will conflict with each
  # others' global state: https://github.com/robertzk/director/issues/25
  director_key <- (function(director) {
    # digest::digest(list(director$root(), director$project_name()))
    paste0("director", director$.id)
  })(resource$director)

            

We do not need inherits = FALSE because the parent environment is the empty environment.

                if (!base::exists(director_key, envir = director_state)) {
    director_state[[director_key]] <- new.env(parent = emptyenv())
  }
  state <- director_state[[director_key]]

  if (!base::exists(resource$name, envir = state)) {
    state[[resource$name]] <- new.env(parent = emptyenv())
  }
  state[[resource$name]]
}

            

registry.R

              
            
              #' A persistent on-disk cache of R objects associated with a directory.
#'
#' Having a registry attached to a project is very helpful for maintaining
#' state across R sessions without encoding everything in the unreliable
#' \code{.RData} file.
#'
#' To create a registry, simply write \code{r <- registry("some/directory")}.
#' You can then use \code{r$set('some/key', some_value)} and
#' \code{r$get('some/key')} to set and retrieve values (arbitrary R objects).
#'
#' @docType class
#' @name registry
#' @rdname registry
#' @export
#' @exportClass registry
#' @examples
#' \dontrun{
#'   r <- registry('some/dir') # Create "some/dir" and make a registry there.
#'   r$set('some/key', value <- list(1,2,3))
#'   stopifnot(r$get('some/key'), value)
#' }
registry <- methods::setRefClass('registry',
  fields = list(.root = 'character'),
  methods = list(
    # Initialize a registry. A registry is responsible for maintaining
    # an on-disk cache of R objects (configuration, temporary storage,
    # associated with a directory).
    #
    # @param root character. The root of the registry. If it does not exist,
    #    it (and any not yet existent parent directories) will be created.
    # @param
    # @examples
    # registry(dirname(tempfile()))
    initialize = function(root = NULL) {
            
                    "Initialize a registry."

      # Reference class objects are sometimes initialized on package install, but
      # no arguments are passed! We let it through to avoid installation problems.
      if (is.null(root)) return(NULL)

      enforce_type(root, "character", "registry$new")
      stopifnot(length(root) == 1)

      if (!file.exists(root)) {
        dir.create(root, showWarnings = FALSE, recursive = TRUE)
      }

      if (!file.info(root)$isdir) {
        stop("A registry's root must be a directory, not a file (you provided ",
             crayon::red(root), ")")
      }

      .root <<- normalizePath(root)
    },

    # Retrieve an object from the registry.
    #
    # The key used to locate the object will be the directory/file
    # structure in the registry's root.
    #
    # @param key character. The path relative to the registry's root.
    # @param ... additional keys which will be joined together with
    #    code \code{base::file.path}. Thus, if you \code{get('a','b')},
    #    you are asking for key \code{'a/b'}.
    # @param soft logical. Whether or not to error if the registry key
    #    requested does not exist. If \code{soft = TRUE} and the latter
    #    condition holds, \code{NULL} will be returned instead. The
    #    default is \code{soft = TRUE}.
    # @return an R object stored in the registry under the given \code{key}.
    #    This will be serialized as an RDS file relative to the root of
    #    the registry. If \code{soft = TRUE}, \code{NULL} may be returned
    #    if the \code{key} does not point to a valid registry key.
    # @examples
    # \dontrun{
    #   r <- registry('some/dir')
    #   r$get('foo') # gets key "foo"
    #   r$get('foo', 'bar', 'baz') # get key "foo/bar/baz"
    # }
    get = function(key, ..., soft = TRUE) {
      enforce_type(key, "character", "registry$get")

      if (length(rest <- c(...)) != 0) {
        enforce_type(rest, "character", "registry$get", "...")
            

If we called $get('a', 'b', 'c'), squish it into “a/b/c”.

                      key <- do.call('file.path', as.list(c(key, rest)))
      }

      key <- .sanitize_key(key, read = TRUE, soft = soft)
            

Recall that parentheses around an R expression drop invisibility.

                    if (!is.null(key) && file.exists(key)) (readRDS(key))
    },

    # Place an object in the registry.
    #
    # The key used to locate the object will be the directory/file
    # structure in the registry's root. The object is serialized using
    # \code{saveRDS}.
    #
    # @param key character. The path relative to the registry's root.
    # @param value ANY. Some R object to serialize into the registry.
    # @examples
    # r <- registry(dirname(tempfile))
    # r$set("example/key", "example_value")
    # # The directory "example" was created under the registry"s root
    # # with a filename "key" that holds the string "example_value".
    # stopifnot(registry$get("example", "key") == "example_value")
    set = function(key, value) {
      # TODO: (RK) Warn on overwrite?

      key <- .sanitize_key(key, read = FALSE)
      dir.create(dirname(key), FALSE, TRUE)
      error_handler <- function(e) {
        stop("Failed to save registry key ", sQuote(crayon::red(key)),
             " in registry with root ", sQuote(crayon::blue(.root)),
             " because: \n\n", crayon::yellow(e$message), "\n\n")
      }
      tryCatch(error = error_handler, saveRDS(value, key))
    },

    # Sanitize a registry key to ensure it can point to a filename.
    #
    # @param key character. The registry key to sanitize. Note that
    #    this will determine an actual file structure, so if the
    #    the key \code{'foo/bar'} is used, an actual directory
    #    \code{'foo'} will be created inside the registry's root.
    # @param read logical. Whether a read or write operation is being
    #    performed on the registry. In the former scenario, this method
    #    tests that an associated filename exists. In case of a write
    #    operation, the requisite directories are created. Thus,
    #    setting \code{"nonexistent/file"} will error if \code{read = TRUE},
    #    but create the \code{"nonexistent"} directory if \code{tread = FALSE}.
    #    The default is \code{read = TRUE}.
    # @param soft logical. Whether or not to error if \code{read = TRUE} and
    #    the filename implied by \code{key} does not exist. If you try to
    #    create a key one of whose parent directories is actually a file
    #    while \code{read = FALSE}, it will still error however. For example,
    #    sanitizing key \code{"foo/bar/baz"} when \code{"foo/bar"} is a file
    #    with \code{read = FALSE} results in an error. The default is
    #    \code{TRUE}.
    # @return a character representing the sanitized key.
    # @examples
    # \dontrun{
    #   r <- registry(dirname(tempfile()))
    #   r$.sanitize_key('nonexistent/file') # This will complain
    #   r$.sanitize_key('nonexistent/file', read = FALSE)
    #   # This will create the `nonexistent` directory in the registry root.
    # }
    .sanitize_key = function(key, read = TRUE, soft = TRUE) {
      enforce_type(key, "character", "registry$.sanitize_key")

      if (length(key) == 0) return(character(0))
      if (length(key) > 1) return(vapply(key, .self$.sanitize_key, character(1)))

            

Using consecutive dots would allow us to traverse into parent directories, which is probably a security risk.

                    if (grepl("..", key, fixed = TRUE))
        stop("Registry keys cannot contain two consecutive dots (the ",
             "key ", sQuote(crayon::red(key)), " was given in ",
             "registry with root ", sQuote(crayon::blue(.root)), ".")

      if (isTRUE(read)) {
        if (!file.exists(filename <- file.path(.root, key))) {
          if (soft) NULL
          else stop("There is no registry item with key ",
                    sQuote(crayon::red(key)), " in registry with root ",
                    sQuote(crayon::blue(.root)))
        } else if (file.info(filename)$isdir) {
          stop("There is no registry item with key ", sQuote(crayon::red(key)),
               " in registry with root ", sQuote(crayon::blue(.root)),
               " because this key points to a directory.")
        } else filename
      } else {
        warning_handler <- function(e) {
          if (!is(e, "warning") || grepl("reason 'Not a directory'", e$message, fixed = TRUE))
            stop("Cannot create registry key ", sQuote(crayon::red(key)),
                 " in registry with root ", sQuote(crayon::blue(.root)),
                 " because: \n\n", crayon::yellow(e$message), "\n\n")
        }
            

This statement is only true for top-level directories like “.” or “/”.

                      if ((dir <- dirname(key)) != key) {
          if (file.exists(d <- file.path(.root, dir)) && !file.info(d)$isdir) {
            warning_handler(list(message =
              paste(sQuote(d), "is a file but must be a directory")))
          }
          tryCatch(warning = warning_handler,
            dir.create(file.path(.root, dir), showWarnings = FALSE, recursive = TRUE))
        }
        file.path(.root, key)
      }
    } # end .sanitize_key method
  )
)

            

In order to use both registry() and registry$new() when loading the package externally, we must export the registry function in addition to the reference class.

              #' @docType function
#' @name registry
#' @export
NULL
            

resource-caching_layer.R

              
            
              #' Cache a resource's parsed value.
#'
#' More complex resources are typically time-consuming to compile, or
#' are unnecessary to compile more than once per R session.
#'
#' The \code{caching_layer} provides an internal caching mechanism that
#' will remember the value of the parsed resource and re-use it
#' \emph{unless the resource or any of its dependencies have been
#' modified} (see \link{dependency_tracking} for an explanation of
#' how this is accomplished).
#'
#' @name resource caching
#' @aliases caching_layer
#' @param object active_resource. See \code{\link{active_resource}}.
#' @param ... additional parameters to pass to the next layer in the resource
#'    parsing tower.
#' @param recompile. logical. Whether or not to force the resource to
#'    be recompiled (instead of retrieved from cache), regardless of
#'    whether the resource or any of its dependencies have been modified.
#' @seealso \code{\link{active_resource}}, \code{\link{tower}}
#' @return The parsed resource, retrieved from cache since the last time
#'    the resource was executed if and only if the resource's file(s) and
#'    none of its (recursive) dependencies have been modified.
#' @note The parameters must be named \code{object} and \code{...} due to
#'    this method's inclusion in a \code{\link{tower}}.
#' @examples
#' \dontrun{
#'   # Imagine we are constructing a stagerunner from a sequence of functions.
#'   # However, some of those functions have been built by other resources.
#'   # Imagine the following structure.
#'   # (See github.com/robertzk/stagerunner for an explanation of stagerunners.)
#'
#'   #=== /dir/runners/project1.R ===
#'   list(
#'     "import data"  = resource("importers/db"),   # These are some functions
#'     "munge data"   = resource("mungers/impute"), # built by the user
#'     "create model" = resource("models/lm"),      # that live in other
#'     "export model" = resource("exporters/file")  # files.
#'   )
#'
#'   #=== R console ===
#'   d <- director("/dir") # Create a director object.
#'   d$register_parser("runners/", function(output) {
#'     stagerunner::stageRunner$new(new.env(), output)
#'   }, cache = TRUE) # Note the cache = TRUE argument.
#'
#'   sr  <- d$resource("runners/project1") # A fresh new stageRunner!
#'   sr2 <- d$resource("runners/project1") # Same one, since it used the cache.
#'   stopifnot(identical(sr, sr2))
#'
#'   # We can use base::Sys.setFileTime to pretend like we updated the
#'   # modified time of the /dir/connections/dev.R file, triggering
#'   # the caching layer to re-compile the resource.
#'   Sys.setFileTime(file.path(d$root(), "runners", "project1.R"),
#'     Sys.time() - as.difftime(1, units = "mins"))
#'
#'   sr3 <- d$resource("runners/project1") # Now it re-builds the runner.
#'   stopifnot(!identical(sr, sr3)) # A new runner, with hardly any work!
#' }
caching_layer <- function(object, ..., recompile. = FALSE) {
  # Ideally, caching should be as smart as preprocessor/parser routing
  # but this is a cheap way to do it now.
  caching_enabled <- any_is_substring_of(
    object$resource$name,
    object$resource$director$cached_resources()
  )

  if (!caching_enabled) {
            

We skip the caching layer completely if this is not a cached resource.

                  yield()
  } else {
            

If this resource has been parsed before but any of its dependencies have been modified, we should wipe the cache.

                  is_cached <- 
      !isTRUE(recompile.) && 
      !isTRUE(object$injects$any_dependencies_modified) &&
            

This will exist if and only if the resource has been parsed and its value has been stored before.

                    base::exists("caching_layer.value", envir = object$state)

    if (is_cached) {
            

This is used down the road by the stop_tracking_dependencies helper of the dependency_tracker.

                    object$injects$cache_used <- TRUE
      object$state$caching_layer.value
    } else {
            

Note that wrapping an expression in parentheses in R will prevent its output from becoming invisible.

                    (object$state$caching_layer.value <- yield())
    }
  }
}

            

resource-dependency_tracker.R

              
            

Before reading this file, you should probably take a look at resource-modification_tracker.R.

              #' Track the dependencies of a resource.
#'
#' More complex resources are often built from simpler resources. It is 
#' the responsibility of the \code{dependency_tracker} to determine
#' whether any dependencies have been modified.
#'
#' The \code{dependency_tracker} is very good at its job and can track
#' arbitrarily nested dependencies (for example, if resource \code{"foo"}
#' needs resource \code{"bar"} who needs resource \code{"baz"}, etc.).
#' But beware! The \code{dependency_tracker} won't tolerate circular
#' dependencies with anything except tears of anguish.
#'
#' The local \code{any_dependencies_modified} is injected by the 
#' \code{dependency_tracker} for use in the preprocessor or parser
#' of a resource. Note this is based off the dependencies \emph{last time}
#' the resource was executed, since it is impossible to know a priori
#' what the dependencies will be prior to sourcing the resource's file.
#'
#' The local \code{dependencies}, a character vector of (recursive)
#' dependencies is also injected.
#'
#' @name dependency tracking
#' @aliases dependency_tracker
#' @param object active_resource. See \code{\link{active_resource}}.
#' @param ... additional parameters to pass to the next layer in the resource
#'    parsing tower.
#' @param dependency_tracker.return. What to return in this layer
#'    of the parsing tower. The options are \code{"dependencies"},
#'    \code{"any_dependencies_modified"}, and \code{"object"}.
#'  
#'    The former returns the list of recursive dependencies of the resource,
#'    as of last time the resource was executed.
#'   
#'    Choosing \code{"any_dependencies_modified"} will answer whether any
#'    of the files associated with the dependencies, \emph{or the resource
#'    itself}, have been modified.
#'
#'    The last (default) choice, \code{"object"}, will return the parsed
#'    resource's value as usual by proceeding with the resource parsing
#'    tower.
#' @seealso \code{\link{active_resource}}, \code{\link{resource_caching}},
#'    \code{\link{tower}}
#' @return The parsed resource.
#' @note The local \code{any_dependencies_modified} rarely needs to be
#'    used by a preprocessor or parser. You should usually use 
#'    \code{resource caching} instead.
#'
#'    The parameters must be named \code{object} and \code{...} due to
#'    this method's inclusion in a \code{\link{tower}}.
#' @examples
#' \dontrun{
#'   # Imagine we are constructing a stagerunner from a sequence of functions.
#'   # However, some of those functions have been built by other resources.
#'   # Imagine the following structure.
#'   # (See github.com/robertzk/stagerunner for an explanation of stagerunners.)
#'
#'   #=== /dir/runners/project1.R ===
#'   list(
#'     "import data"  = resource("importers/db"),   # These are some functions
#'     "munge data"   = resource("mungers/impute"), # built by the user
#'     "create model" = resource("models/lm"),      # that live in other
#'     "export model" = resource("exporters/file")  # files.
#'   )
#'
#'   #=== /dir/importers/db.R ===
#'   conn <- resource("connections/dev") # A list representing a connection
#'     # setting to a development database.
#'   DBI::dbReadTable(conn, "some_table")
#'
#'   #=== /dir/connections/dev.R
#    # Some file that sets up and returns a database connection.
#'
#'   #=== R console ===
#'   d <- director("/dir") # Create a director object.
#'   d$register_preprocessor("runners/",
#'     function(director, source, any_dependencies_modified) {
#'       # `any_dependencies_modified` has been set by the dependency_tracker to
#'       # TRUE or FALSE according as /dir/runners/project1.R *or any of its
#'       # dependencies* has been modified.
#'       if (any_dependencies_modified ||
#'           is.null(runner <- director$cache_get("last_runner"))) {
#'         # Construct a new stageRunner, since a dependency has been modified.
#'         source()
#'       } else { runner }
#'   })
#'
#'   d$register_parser("runners/", function(output) {
#'     # If it is a stageRunner, it must have been retrieved from the cache.
#'     if (stagerunner::is.stageRunner(output)) { return(output) }
#'     runner <- stagerunner::stageRunner$new(new.env(), output)
#'  
#'     # Cache the runner so it is available in the preprocessor next time.
#'     # As long as the /dir/runners/project1.R file remains untouched, we will
#'     # not have to bother re-sourcing the file and hence reconstructing the
#'     # stageRunner.
#'     director$cache_set("last_runner", runner)
#'     runner
#'   })
#'
#'   sr  <- d$resource("runners/project1") # A fresh new stageRunner!
#'   sr2 <- d$resource("runners/project1") # Same one, since it used the cache.
#'   stopifnot(identical(sr, sr2))
#'
#'   # We can use base::Sys.setFileTime to pretend like we updated the
#'   # modified time of the /dir/connections/dev.R file, triggering
#'   # `any_dependencies_modified = TRUE`.
#'   Sys.setFileTime(file.path(d$root(), "connections", "dev.R"),
#'     Sys.time() - as.difftime(1, units = "mins"))
#'
#'   sr3 <- d$resource("runners/project1") # Now it re-builds the runner.
#'   stopifnot(!identical(sr, sr3)) # A new runner!
#' }
dependency_tracker <- function(object, ..., dependency_tracker.return = "object") {
  if (identical(dependency_tracker.return, "any_dependencies_modified")) {
    any_dependencies_modified(object)
  } else if (identical(dependency_tracker.return, "dependencies")) {
    dependencies(object)
  } else {
            

While a resource is being sourced it can reference other resources. For example, it could use the resource(...) function provided in the resource's sourcing environment. Its parser or preprocessor could call director$resource(...) directly to load another resource.

In other words, while the execution of this resource is on the call stack, whole swaths of other resources may be getting processed. To “remember” what resources have been referenced we will use some unfortunately necessary global state in the begin_tracking_dependencies helper, and then undo our work in stop_tracking_dependencies.

                  begin_tracking_dependencies(object)

    value <- yield()
    
    stop_tracking_dependencies(object)

    value
  }
}

dependency <- function(nesting_level, resource_name) {
  structure(class = "directorDependency", list(
    level = nesting_level,
    resource_name = resource_name
  ))
}

any_dependencies_modified <- function(active_resource) {
            

If the resource has ever been parsed before, we will remember its dependencies in state$dependency_tracker.dependencies (as a character vector of resource names).

                dependencies <- active_resource$state$dependency_tracker.dependencies %||% character(0)

            

The resource itself need never consider itself as a dependency.

                dependencies <- setdiff(dependencies, active_resource$resource$name)

            

Recall that the modified local was injected back in the modification_tracker.

                modified <- active_resource$injects$modified

            

We recursively determine if this resource or any of its dependencies have been modified.

                is_modified <- function(name) {
            

We have to set modification_tracker.touch = FALSE to not disturb the modification_tracker.queue – this is a read-only operation and should not update any cached modification times!

                  active_resource$resource$director$resource(name,
      modification_tracker.touch = FALSE,
      dependency_tracker.return  = "any_dependencies_modified"
    )
  }
  modified || any(vapply(dependencies, is_modified, logical(1)))
}

dependencies <- function(active_resource) {
  dependencies <- active_resource$state$dependency_tracker.dependencies %||% character(0)

  nested_dependencies <- lapply(
    dependencies,
    active_resource$resource$director$resource,
    modification_tracker.touch = FALSE,
    dependency_tracker.return  = "dependencies"
  )
  
  # Some resources may depend on the same dependencies, so we `unique`
  # at the end to jiggle those away.
  unique(c(recursive = TRUE, dependencies, nested_dependencies))
}

            

This function is purely used for its side effects.

              begin_tracking_dependencies <- function(active_resource) {
  director <- active_resource$resource$director

            

First, we provide whether or not the resource or its dependencies as of the last time this resource was executed have been modified. (After all, we can't know a priori what its dependencies are before executing it.)

                any_modified <- director$resource(
    active_resource$resource$name,
    virtual_check.skip         = TRUE,
    dependency_tracker.return  = "any_dependencies_modified",
    modification_tracker.touch = FALSE
  )

  active_resource$injects %<<% list(any_dependencies_modified = any_modified)

            

We will create a stack data structure to keep track of currently processed resources. Recall that director_state is an environment within the director package namespace that is used to explicitly represent state global to the R session.

                if (!base::exists("dependency_stack", envir = director_state)) {
            

The “shtack” is defined in utils.R

                  director_state$dependency_stack <- shtack$new()
  }

            

The nesting_level will keep track of how far down the “resource call stack” we are. Every time we process a resource while amidst the processing of a parent resource, this number will get incremented by one.

                nesting_level <- director_state$dependency_nesting_level %||% 0
  if (nesting_level > 0L) {
            

If this is not a top-level resource (i.e., one called directly using director$resource rather than from another resource), push this dependency onto the stack.

We do not need to push top-level resources onto the stack since they aren't anyone's dependency!

                  director_state$dependency_stack$push(
      dependency(nesting_level, active_resource$resource$name)
    )
  } else {
    # TODO: (RK) Explain this tricky point.
    director_state$defining_environment <- 
      active_resource$resource$defining_environment
  }

  director_state$dependency_nesting_level <- nesting_level + 1
}

stop_tracking_dependencies <- function(active_resource) {
  director <- active_resource$resource$director

            

Recall the nesting level from start_dependency_tracker. It has been incremented by one until this method finishes.

                nesting_level <- director_state$dependency_nesting_level

            

This is the key to determining a resource's immediate (as opposed to recursive) dependencies. Say we have “foo” which calls “bar” whichs calls “baz”. The nesting level will get incremented once during the execution of “bar” and once again during “baz”.

Thus, we can tell “bar” is an immediate direct dependency of “foo” which “baz” is an implicit dependency through “bar” by noting the nesting level.

                dependencies <- Filter(
    function(dependency) dependency$level == nesting_level, 
    director_state$dependency_stack$peek(TRUE) # everything on the stack
  )

            

If retrieved the resource from cache in the caching_layer we should not update its dependencies, since it will look like it has none.

                if (!isTRUE(active_resource$injects$cache_used)) {
            

Now that we have snatched the correct dependencies in the previous statement, we just extract their proper resource names.

                  active_resource$state$dependency_tracker.dependencies <-
      vapply(dependencies, getElement, character(1), "resource_name")
  }

            

This is after all a dependency stack. We can remove all the dependencies of this resource by popping those with the current nesting level. Those with higher nesting levels will have been recursively popped off already.

                while (!director_state$dependency_stack$empty() &&
         director_state$dependency_stack$peek()$level == nesting_level) {
    director_state$dependency_stack$pop()
  }

            

And we're back, cap'n.

                director_state$dependency_nesting_level <- nesting_level - 1
}
            

resource-modification_tracker.R

              
            

Take a look at resource-virtual_checker.R prior to reading this file.

              #' Detect modifications to a resource.
#'
#' The \code{modification_tracker} is responsible for determining whether
#' any changes have been made to the file(s) associated to the given
#' resource.
#'
#' If a modification has been detected, the local \code{modified}
#' will be injected for use in the preprocessor or parser for the resource.
#'
#' Note that we can use the \code{modification_tracker} to determine
#' whether the resource has been modified:
#'
#' \code{director_object$resource(resource_name,
#'   modification_tracker.touch = FALSE,
#'   modification_tracker.return = "modified")}
#' 
#' The use of \code{modification_tracker.touch = FALSE} is necessary to avoid
#' polluting the internal cache that determines whether or not the resource
#' has been modified.
#' 
#' @name modification tracking
#' @aliases modification_tracker
#' @param object active_resource. See \code{\link{active_resource}}.
#' @param ... additional parameters to pass to the next layer in the resource
#'    parsing tower.
            

The incorporation of these parameters is a little suspect, but I saw no no other way to retain the elegance and composability of the tower abstraction while providing the ability to fetch information from “halfway down the stream.”

A possible alternative is to implement the equivalent of Scala's “Breaks” by exploiting the R condition system.

              #' @param modification_tracker.return character. What to return in this layer
#'    of the parsing tower. The options are \code{c("modified", "object")}.
#'
#'    The former returns whether or not the file associated with the resource
#'    has been modified (or in the case of idempotent resources, the file and
#'    its helpers). The resource itself will not be parsed.
#'
#'    The latter, \code{"object"}, will parse the resource as usual. This is
#'    the default.
#' @param modification_tracker.touch logical. Whether or not to update an
#'    internal cache that keeps track of last time the resource is modified.
#'    This is an internal parameter that is set to \code{FALSE} by recursive
#'    calls to \code{director$resource} to avoid polluting the modification
#'    cache while investigating dependency changes. The default is \code{TRUE}.
#' @seealso \code{\link{active_resource}}, \code{\link{tower}}
#' @return The parsed resource.
#' @note The parameters must be named \code{object} and \code{...} due to
#'    this method's inclusion in a \code{\link{tower}}.
#' @examples
#' \dontrun{
#'   # Imagine we are constructing a stagerunner from a sequence of functions.
#'   # However, some of those functions have been built by other resources.
#'   # Imagine the following structure.
#'   # (See github.com/robertzk/stagerunner for an explanation of stagerunners.)
#'
#'   #=== /dir/runners/project1.R ===
#'   list(
#'     "import data"  = resource("importers/db"),   # These are some functions
#'     "munge data"   = resource("mungers/impute"), # built by the user
#'     "create model" = resource("models/lm"),      # that live in other
#'     "export model" = resource("exporters/file")  # files.
#'   )
#'
#'   #=== R console ===
#'   d <- director("/dir") # Create a director object.
#'   d$register_preprocessor("runners/", function(director, source, modified) {
#'     # `modified` has been set by the modification_tracker to
#'     # TRUE or FALSE according as /dir/runners/project1.R has been modified.
#'     if (modified || is.null(runner <- director$cache_get("last_runner"))) {
#'       # Construct a new stageRunner, since the file has been modified.
#'       source()
#'     } else { runner }
#'   })
#'
#'   d$register_parser("runners/", function(output) {
#'     # If it is a stageRunner, it must have been retrieved from the cache.
#'     if (stagerunner::is.stageRunner(output)) { return(output) }
#'     runner <- stagerunner::stageRunner$new(new.env(), output)
#'  
#'     # Cache the runner so it is available in the preprocessor next time.
#'     # As long as the /dir/runners/project1.R file remains untouched, we will
#'     # not have to bother re-sourcing the file and hence reconstructing the
#'     # stageRunner.
#'     director$cache_set("last_runner", runner)
#'     runner
#'   })
#'
#'   sr  <- d$resource("runners/project1") # A fresh new stageRunner!
#'   sr2 <- d$resource("runners/project1") # Same one, since it used the cache.
#'   stopifnot(identical(sr, sr2))
#'
#'   # We can use base::Sys.setFileTime to pretend like we updated the
#'   # modified time of the project1.R file, triggering `modified = TRUE`.
#'   Sys.setFileTime(file.path(d$root(), "runners", "project1.R"),
#'     Sys.time() - as.difftime(1, units = "mins"))
#'
#'   sr3 <- d$resource("runners/project1") # Now it re-builds the runner.
#'   stopifnot(!identical(sr, sr3)) # A new runner!
#' }
modification_tracker <- function(object, ..., modification_tracker.return = "object",
                                 modification_tracker.touch = TRUE) {
  director <- object$resource$director

            

We injected virtual in the previous layer, virtual_checker, using object$injects %<<% list(virtual = virtual)

                if (isTRUE(object$injects$virtual)) {
            

Virtual resources are never considered to be modified, since we have have no corresponding file and we have no way to tell.

                  if (identical(modification_tracker.return, "modified")) {
      FALSE
    } else if (identical(modification_tracker.return, "mtime")) {
      NULL
    } else {
      object$injects %<<% list(modified = FALSE)
      yield()
    }
  } else {
            

In order to keep track of whether the resource has been modified, we will need to store the previous and current modification time and compare them to see if anything has changed.

To this end, we make use of a sized_queue, a queue (in this case of length 2) where pushing a new element pops off the last if there are more than 2. This may seem extravagant, but will make it trivial to extend director to keep track of longer modification histories if we ever need to do so.

We use base::exists instead of exists to make it clear we are not calling exists on the director object.

                  if (!base::exists("modification_tracker.queue", envir = object$state)) {
      object$state$modification_tracker.queue <- sized_queue(size = 2)
    }

    mtime <- determine_last_modified_time(object)

            

The modification_tracker.touch argument says we would like to explicitly modify the allocated queue of modification times. This means that we will have to wait until the resource's file changes again to mark it as modified.

                  if (isTRUE(modification_tracker.touch)) {
      object$state$modification_tracker.queue$push(mtime)
            

In this case, a resource has been modified if its modification time is not the same as last time, i.e., the first and second element in the queue are not identical.

                    modified <- !do.call(identical,
        lapply(seq(2), object$state$modification_tracker.queue$get)
      )
    } else {
      modified <- !identical(object$state$modification_tracker.queue$get(1), mtime)
    }

    # Ferry whether or not the resource has been modified to the preprocessor
    # and parser down the stream.
    object$injects %<<% list(modified = modified)

    if (identical(modification_tracker.return, "modified")) {
      object$injects$modified
    } else if (identical(modification_tracker.return, "mtime")) {
      mtime
    } else {
      yield()
    }
  }
}

determine_last_modified_time <- function(active_resource) {
  director <- active_resource$resource$director

            

It only makes sense to check for modifications (and thus query the file system) if the current time is later than the last modification time–otherwise, the file could not possibly have been updated yet.

The operator %|||% is defined in utils.R and means “if the former argument is NULL or NA, use the latter instead.”

                if (Sys.time() > active_resource$state$modification_tracker.queue$get(1) %|||% 0) {
            

We use the director$filename method to obtain the file associated with the resource. By setting enclosing = TRUE, we ensure that in the case of idempotent resources this will be directory instead of the .R file (e.g. “foo” instead of “foo/foo.R”).

                  filename <- director$filename(active_resource$resource$name, check.exists = FALSE,
                                  absolute = TRUE, enclosing = TRUE)
    if (is.idempotent_directory(filename)) {
            

We use the get_helpers function in utils.R to get all the helper files. If the resource file (“foo/foo.R”) or its helpers (“foo/helper.R”, etc) have been modified, it will be reflected in the maximum of their modification times.

                    files <- c(filename, get_helpers(filename, full.names = TRUE, leave_idempotent = TRUE))
      mtime <- max(file.info(files)$mtime, na.rm = TRUE)
    } else {
      mtime <- file.info(filename)$mtime
    }
  } else {
            

In this case, the current timestamp is <= the last modified time, so nothing could have possibly changed yet. We use the cached modification time from the queue defined earlier.

                  mtime <- active_resource$state$modification_tracker.queue$get(1)
  }

  mtime
}

            

resource-parser.R

              
            
              #' Apply the parser to a resource.
#'
#' @name parser
#' @aliases parsers
#' @param object active_resource. See \code{\link{active_resource}}.
#' @param ... additional parameters to pass to the next layer in the resource
#'    parsing tower.
#' @param parse. logical. Whether or not to apply the \link{parser} to the
#'    resource. If \code{FALSE}, this is equivalent to sourcing the resource's
#'    file without running its parser. By default, \code{TRUE}.
#' @return The parsed resource, usually some useful R object.
#' @note The parameters must be named \code{object} and \code{...} due to
#'    this method's inclusion in a \code{\link{tower}}.
parser <- function(object, ...) {
  director <- object$resource$director

  route <- director$match_parser(object$resource$name)

  if (is.null(route)) {
    # No parser for this resource.
    # Use the default parser, just grab the value.
    object <- object$preprocessed$value
  } else {
    args <- list(...)
    # TODO: (RK) Use `args` to carry around to prevent partial matching
    # on "object" ...
    object <- apply_parser(object, route, args)
  }

            

This will yield to the identity function, simply returning object and thus the parsed resource. Every function in a tower (see tower.R) must call yield. This also encourages us to plan ahead if we ever want to add even further layers in the resource parsing tower.

                yield()
}

apply_parser <- function(active_resource, route, args) {
  director <- active_resource$resource$director

  parser_function <- director$parser(route)

  sourcing_env <-
    new.env(parent = director_state$defining_environment) %<<%
    environment(parser_function)

  environment(parser_function) <-
    new.env(parent = sourcing_env) %<<%
           active_resource$injects %<<% list(
    # TODO: (RK) Intersect with parser formals.
    # TODO: (RK) Use alist so these aren't evaluated right away.
     resource = active_resource$resource$name,
     input = active_resource$state$preprocessor.source_env,
     output = active_resource$preprocessed$value,
     director = director,
     preprocessor_output = active_resource$preprocessed$preprocessor_output,
     filename = active_resource$injects$filename,
     args = args,
     "%||%" = function(x, y) if (is.null(x)) y else x
  )
  parser_function()
}

            

resource-preprocessor.R

              
            
              #' Apply the preprocessor to a resource.
#'
#' Hand in hand with parsers, preprocessors are the heart of director. The idea
#' is to allow the developer to do any additional stuff prior to sourcing an R
#' file. For example, if some helper functions are desired or some operators
#' should be overloaded for a DSL (domain-specific language), the preprocessor
#' can perform this prior to sourcing the R file.
#' 
#' To define a preprocessor for routes in a given path, you can use the
#' \code{\link{register_preprocessor}} method on the director object.
#'
#' @name preprocessor
#' @aliases preprocessors
#' @param object active_resource. See \code{\link{active_resource}}.
#' @param ... additional parameters to pass to the next layer in the resource
#'    parsing tower.
#' @param parse. logical. Whether or not to apply the \link{parser} to the
#'    resource. If \code{FALSE}, this is equivalent to sourcing the resource's
#'    file without running its parser. By default, \code{TRUE}.
#' @seealso \code{\link{register_preprocessor}}, \code{\link{active_resource}},
#'    \code{\link{tower}}
#' @return The parsed resource if \code{parse. = TRUE}, and the preprocessed
#'    resource otherwise.
#' @note The parameters must be named \code{object} and \code{...} due to
#'    this method's inclusion in a \code{\link{tower}}.
preprocessor <- function(object, ..., parse. = TRUE) {
  director <- object$resource$director

            

Find the character string representing the preprocessor route used for preprocessing the resource.

                route <- director$match_preprocessor(object$resource$name)

  if (isTRUE(object$injects$virtual)) {
            

Virtual resources are by definition those with no corresponding filename. See the virtual_checker.

                  filename <- NULL
  } else {
            

We place the filename in the object's injects to make it available to the parser down the stream.

                  filename <- object$injects$filename <-
      normalizePath(director$filename(object$resource$name,
                                      absolute = TRUE, check.exists = FALSE))
  }

  object$injects %<<% list(
    # TODO: (RK) Use alist so these aren't evaluated right away.
    root = director$root,
    # TODO: (RK) Use find_director helper to go off root + project_name
    # TODO: (RK) Determine how to handle defining_environment. problems here.
    resource = function(...) {
      defining_environment <- parent.frame()
      director$resource(..., defining_environment. = defining_environment)
    },
    resource_name = object$resource$name,
    resource_exists = function(...) director$exists(...),
    helper = function(...) {
      director$resource(..., parse. = FALSE, virtual_check.skip = TRUE)
    }
  )

  if (is.null(route)) {
    object$preprocessed <- default_preprocessor(object) 
  } else {
    object$preprocessed <- apply_preprocessor_route(object, route, list(...))
  }

  if (isTRUE(parse.)) {
    yield() # Apply parser.
  } else {
    object$preprocessed$value
  }
}

default_preprocessor <- function(active_resource) {
  # There is no preprocessor for this resource, so we
  # use the default preprocessor, base::source.
  default_preprocessor_fn <- function(filename) {
    # TODO: (RK) Figure out correct environment assignment.
    base::source(filename, local = source_env, keep.source = TRUE)$value
  }

  parent_env <- active_resource$resource$defining_environment
  if (!identical(topenv(parent_env), baseenv())) {
    parent_env <- parent.env(topenv(parent_env))
  } else {
    parent_env <- parent.env(parent_env)
  }

  source_env <- new.env(parent = parent_env)
  # source_env <- new.env(parent = parent.env(topenv(parent.env(environment()))))
  source_env %<<% active_resource$injects
  active_resource$state$preprocessor.source_env <- source_env

  environment(default_preprocessor) <- new.env(
    parent = active_resource$resource$defining_environment
  ) %<<% list(source_env = source_env)

  list(
    value               = default_preprocessor_fn(active_resource$injects$filename),
    preprocessor_output = new.env(parent = emptyenv())
  )
}

apply_preprocessor_route <- function(active_resource, route, args, filename) {
  director <- active_resource$resource$director

  active_resource$state$preprocessor.source_env <- new.env(parent = active_resource$injects)

  preprocessor_output <- new.env(parent = emptyenv())
  preprocessor_function <- director$preprocessor(route)

  sourcing_env <-
    new.env(parent = director_state$defining_environment) %<<%
    environment(preprocessor_function)

  environment(preprocessor_function) <-
    new.env(parent = sourcing_env) %<<%
    active_resource$injects %<<% list(
      # TODO: (RK) Intersect with preprocessor formals.
      # TODO: (RK) Use alist so these aren't evaluated right away.
       resource = active_resource$resource$name,
       director = director,
       args = args,
       filename = active_resource$injects$filename,
       source_env = active_resource$state$preprocessor.source_env,
       source = function() eval.parent(quote({
        if (!is.character(filename)) {
          stop("Director of project ", sQuote(crayon::yellow(director$root())),
               " attempted to source filename of class ", class(filename)[1L], call. = FALSE)
        }
        if (!file.exists(filename)) {
          stop("Director of project ", sQuote(crayon::yellow(director$root())),
               " attempted to source ", sQuote(crayon::red(filename)),
               ", but this file does not exist.", call. = FALSE)
        }
        base::source(filename, source_env, keep.source = TRUE)$value
       })),
       preprocessor_output = preprocessor_output,
       "%||%" = function(x, y) if (is.null(x)) y else x
    )

  list(
    value = preprocessor_function(),
    preprocessor_output = preprocessor_output
  )
}

            

resource-virtual_check.R

              
            
              #' Mark a resource as virtual.
#'
#' Virtual resources are those that are not recorded as a .R file. Instead,
#' the resource's value must be computed using a preprocessor.
#'
#' For example, imagine we have a directory of resources where some of the
#' resources have been re-factored into a package. We would still like to be
#' able to turn objects from that package into proper resources, but they
#' may no longer be encoded as files in the Syberia project.
#'
#' Instead, we could define a preprocessor that looks for those special values
#' and uses the package objects instead.
#'
#' When parsing a resource, the local \code{virtual} is injected for use in
#' the preprocessor which corresponds to whether the resource seems
#' non-existent to the director (i.e., has no supporting .R file).
#'
#' @name virtual resource
#' @aliases virtual_check
#' @param object active_resource. See \code{\link{active_resource}}.
#' @param ... additional parameters to pass to the next layer in the resource
#'    parsing tower.
#' @param virtual_check.skip logical. Whether or not to skip the virtual
#'    check entirely. Generally only used by internal calls.
#' @seealso \code{\link{active_resource}}, \code{\link{tower}}
#' @return The parsed resource.
#' @note The parameters must be named \code{object} and \code{...} due to
#'    this method's inclusion in a \code{\link{tower}}.
#' @examples
#' \dontrun{
#'   # We will use the example of a syberia project.
#'   # See github.com/robertzk/syberia.
#'
#'   # lib/mungebits has imputer.R and no other files, but the package
#'   # syberiaMungebits has more mungebits. We can define the following
#'   # preprocessor.
#'
#'   #=== config/routes.R ===
#'   list(
#'     "lib/mungebits" = "mungebits"
#'   )
#'
#'   #=== lib/controllers/mungebits.R ===
#'   preprocessor <- function(resource, virtual) { 
#'     mungebit <- basename(resource) # lib/mungebits/discretizer becomes discretizer
#'     if (virtual) {
#'       if (exists(mungebit, envir = getNamespace("syberiaMungebits"), inherits = FALSE)) {
#'          # The function exists in the syberiaMungebits package.
#'          get(mungebit, envir = getNamespace("syberiaMungebits")))))
#'        } else {
#'          stop("No mungebit called ", sQuote(resource))
#'        }
#'     } else {
#'       source() # Source the mungebit file as usual
#'     }
#'   }
#'
#'   # Construct the mungebit parser as usual.
#'   function(output) { mungebits::mungebit(output$train, output$predict) }
#'
#'   #=== R console ===
#'   d <- syberia_project("/some/dir")
#'   d$resource("lib/mungebits/imputer") # Will use lib/mungebits/imputer.R
#'   d$resource("lib/mungebits/discretizer") # Will use syberiaMungebits::discretizer
#' }
virtual_check <- function(object, ..., virtual_check.skip = FALSE) {
  if (isTRUE(virtual_check.skip)) { return(yield()) }

  director <- object$resource$director

            

An object is considered to be “virtual” if it has no corresponding file, that is, the director object cannot find a resource by that name.

                virtual  <- !director$exists(object$resource$name)
    
            

Since object$injects is an environment, this is equivalent to object$injects$virtual <- virtual, but this notation is clearer as it is stylistically similar to the later layers in the resource parsing tower where multiple values are injected simultaneously.

                object$injects %<<% list(virtual = virtual)
  

            

If a resource is virtual but has no preprocessor, we cannot possibly parse it, as the default preprocessor is simply sourcing the file corresponding to the resource.

                if (virtual && !director$has_preprocessor(object$resource$name)) {
    project_name <- director$project_name()
    stop(sprintf("Cannot find resource %s in%s project %s.",
      sQuote(crayon::red(object$resource$name)),
      if (nzchar(project_name)) paste0(" ", project_name) else "",
      sQuote(crayon::blue(director$root()))))
  }

            

See tower.R for an explanation of yield.

                yield()
}

            

search_pattern.R

              
            
              #' Define a search pattern for use with the find method on a director.
#'
#' A search pattern is one of the following:
#'
#'   \describe{
#'     \item{exact}{ match. The strings must match exactly this value.}
#'     \item{partial}{ match. The strings which contain this string as
#'        a substring will be matched.}
#'     \item{wildcard}{ match. Fuzzy matching like in the ctrl+p plugin
#'        for vim. If the pattern is "abc", it will be translated to the
#'        regular expression ".*a.*b.*c.*", that is, any characters followed
#'        by an 'a' followed by any characters followed by a 'b' followed by
#'        any characters followed by a 'c' followed by any characters (e.g.,
#'        "fabulous cake"). Note that wildcard match is case insensitive.}
#'      \item{regex}{ match. Apply a regular expression filter to the
#'        set of strings.}
#'   }
#' 
#' @param pattern character. The pattern to search for.
#' @param method character. The search pattern method, one of "exact",
#'    "partial", "wildcard", or "regex".
#' @note Patterns can be combined using the \code{|} and \code{&} operators.
#' @examples
#' \dontrun{
#'   d$find(search_pattern("this/file", "exact"))
#'   # If d is a director object, the above will find exactly the resource
#'   # "this/file".
#' 
#'   d$find(search_pattern("this", "partial"))
#'   # The above will find any resource containing "this" as a substring.
#'
#'   d$find(search_pattern("this", "wildcard"))
#'   # The above will find any resource containing the consecutive letters
#'   # "this" separated by arbitrary strings.
#'
#'   d$find(search_pattern("foobar", "partial") | search_pattern("root", "exact"))
#'   # The above will find any resource with the substring "foobar" or having
#'   # exactly the name "root".
#' }
search_pattern <- function(pattern, method) {
  msg <- function(x) {
    stop("Search ", deparse(substitute(x)) ," must be of type character; ",
         "instead I got a ", class(x)[1])
  }

  if (!is.character(method)) { msg(method) }
  if (!is.character(pattern)) { msg(pattern) }

            

A search pattern is a method for filtering a set of strings that is highly composable. For example, if we have c("foobar", "barbaz", "bazbux"), we can use the pattern search_pattern("bar", "partial") to select the first two, since they have the substring “bar”.

We can apply and and or operations to search patterns to mix and match them. For example, search_pattern("bar", "partial") & search_pattern("baz", "wildcard") will match strings that contain the substring “bar”, as well as the characters “b”, “a”, and “z” separated by arbitrary strings (e.g., “BAzaR”).

                search_pattern_(pattern, tolower(method))
}

search_pattern_ <- function(pattern, method) {
            

We use a recursive solution: the pattern can be a “search pattern join” (the & and | operation described above). In this case, we just return the join.

                if (is.search_pattern_join(pattern)) { pattern }
  else if (length(pattern) > 1) {
            

If there is more than one pattern specified, we treat this as an OR condition: either pattern 1, or pattern 2, etc.

                  Reduce(function(x, y) {
      search_pattern_(x, method) | search_pattern_(y, method)
    }, pattern)
  } else if (length(method) > 1) {
            

If there is more than one method specified, this is also an OR condition. This situation is rare, since we don't often want to say “match this as a wildcard or as a regex”.

                  Reduce(function(x, y) {
      search_pattern_(pattern, x) | search_pattern_(pattern, y)
    }, method)
  } else {
    `verify_search_pattern_method!`(method)
            

We use an S3 class to track information about the pattern (the string to match and the method).

                  as.search_pattern(list(pattern = pattern, method = method))
  }
}

`verify_search_pattern_method!` <- function(method) {
            

getNamespace is a base R function that allows us to grab the namespace of the director package. To understand the difference between a package environment and package namespace, see Suraj Gupta's wonderful guide on how R searches and finds stuff.

Instead of hardcoding all the pattern methods we support like “exact” and “wildcard”, we look into this package's namespace and see if there is an “apply_pattern.exact” or “apply_pattern.wildcard” function. If someone wants to implement a new pattern method, they only need to define an “apply_pattern.new_method” function below, which is cleaner.

                ok <- exists(paste0("apply_pattern.", method), envir = getNamespace("director"))
  if (!ok) { stop("Invalid search pattern.") }
}

search_pattern_join <- function(pattern1, pattern2, type) {
  stopifnot(identical(type, "and") || identical(type, "or"))
            

An S3 object that tracks an & or | condition on patterns.

                as.search_pattern(structure(list(pattern1, pattern2, type = type),
                              class = c("search_pattern_join")))
}

as.search_pattern <- function(x) {
            

Remember that when changing classes, the class should be prepended rather than appended, since R's S3 mechanism looks left-to-right for S3 methods.

                class(x) <- c("search_pattern", class(x))
  x
}

is.search_pattern <- function(x) { is(x, "search_pattern") }
is.atomic_search_pattern <- function(x) {
            

An atomic search pattern is one that has not been joined using the & or | operators.

                is.search_pattern(x) && !is.search_pattern_join(x)
}
is.search_pattern_join <- function(x) { is(x, "search_pattern_join") }

            

This funky looking notation says “implement the | operator for the "search_pattern” S3 class.

              `|.search_pattern` <- function(e1, e2) {
  stopifnot(is(e2, "search_pattern"))

  search_pattern_join(e1, e2, type = "or")
}

`&.search_pattern` <- function(e1, e2) {
  stopifnot(is(e2, "search_pattern"))

  search_pattern_join(e1, e2, type = "and")
}

#' Apply a pattern filter to a character vector.
#' 
#' @param pattern search_pattern.
#' @param strings character. The strings to filter down.
apply_pattern <- function(pattern, strings) {
  if (is.atomic_search_pattern(pattern)) {
            

First we apply the pattern's method as an S3 class. For example, a wildcard pattern would get the “wildcard” class.

                  class(pattern) <- c(pattern$method, class(pattern)) 

            

R's UseMethod function dispatches an S3 generic. This means that we will call apply_pattern.wildcard on the pattern object without having to figure out that is the appropriate method.

                  UseMethod("apply_pattern", object = pattern)
  } else if (is.search_pattern_join(pattern)) {
    operand <- if (pattern$type == "and") { intersect } else { union }
            

Recall is an R shortcut for “recursively call this function”, i.e., apply_pattern(...).

                  operand(Recall(pattern[[1]], strings), Recall(pattern[[2]], strings))
  } else { stop("Invalid pattern") }
}

apply_pattern.exact <- function(pattern, strings) {
            

An exact match is just a single string that matches on the nose.

                if (any(pattern$pattern == strings)) { pattern$pattern }
  else { character(0) }
}

apply_pattern.wildcard <- function(pattern, strings) {
            

First, replace all regex special characters with the correct backslashed version. I wish I could say I knew how many backslashes are necessary but it was trial and error. ;)

                pattern <- gsub("([]./\\*+()])", "\\\\\\1", pattern$pattern)

            

The only regex special characters we allow in wildcards are ^ and $ to mark beginning and ends of strings. The rest gets replaced with a .* prefix. For example, “abc” would be come “.*a.*b.*c”.

                pattern <- gsub("([^\\$^])", ".*\\1", pattern) # turn this into ctrl+p
    
            

But of course “.*a” is just “a”! So we turn that special sequence into just “”.

                pattern <- gsub("^.*", "^", pattern, fixed = TRUE)

            

By default, wildcards matching is case insensitive, since it will be used to filter on file names, and we rarely have file collisions based on case (and when you do you should think of a better file name instead!).

                grep(pattern, strings, value = TRUE, ignore.case = TRUE)
}

apply_pattern.partial <- function(pattern, strings) {
            

Just a plain substring match.

                grep(pattern$pattern, strings, fixed = TRUE, value = TRUE)
}

apply_pattern.regex <- function(pattern, strings) {
            

Just a plain regex match.

                grep(pattern$pattern, strings, value = TRUE)
}

apply_pattern.idempotence <- function(pattern, strings) {
  # TODO: (RK) Consider the string "."
            

For an overview of idempotence, see the documentation on the director exists method.

The idempotent pattern finds the helpers in a set of filenames and strips them. For example, c("foo.R", "bar/bar.R", "bar/baz.R") would be reduced to just c("foo.R", "bar/bar.R") (note that this pattern is not just a filter and has side effects).

Grab the indices of those files whose base name is the same as their enclosing directory name (for example, “foo/bar/bar.R”).

                idempotent <- vapply(strings, function(x) basename(x) == basename(dirname(x)), logical(1))

            

What are the actual directory names? (for example, “foo/bar”)

                idem_dirs  <- dirname(strings[idempotent])

            

Helper files are the files in the idem_dirs computed above who do not share their name with the parent directory. We need to find the indices of these files in our strings.

                helpers <- vapply(strings, function(x) dirname(x) %in% idem_dirs, logical(1))

            

Now replace the idempotent files with their directory names. In director, the name of an idempotent resource is the filename sans the basename (for example, “foo/bar” rather than “foo/bar/bar.R”).

                strings[idempotent] <- idem_dirs
    
            

Strip the helper files but keep the idempotent resources. Note that since the idempotent files, like “foo/bar/bar.R”, are within an idempotent directory, like “foo/bar”, they will be marked as TRUE in the helpers vector.

                strings[!helpers | idempotent]
}

            

tower.R

              
            
              #' Create a tower of functions.
#'
#' A tower is equivalent to the Ruby gem Rack's notion of middleware.
#'
#' Imagine a function \code{f1} that, in the middle of its processing,
#' calls another function, \code{f2}, which in the middle of its
#' processing, calls another function, \code{f3}, and so on.
#' 
#' To construct such a structure from the list of functions
#' \code{list(f1, f2, ...)} we need to be able to call the 
#' next "inner" function from the previous outer function.
#'
#' This is accomplished by providing a \code{yield} keyword
#' which simply calls the next function in the tower.
#'
#' The purpose of a tower is to modify some primary object
#' throughout its operation. To this end, an \code{object} keyword
#' will be provided to each tower function. If \code{object} is
#' modified prior to a \code{yield} call, the next function in
#' the tower will receive the modified object.
#'
#' For composability, every function in a tower should have a
#' \code{yield} keyword. The last function in the tower will
#' yield to an identity function that simply returns the \code{object}.
#'
#' @param functions list. A list of functions in the tower. The
#'    first argument of each function must be named "object" and
#'    each function must take a \code{...} parameter. By default
#'    \code{list()}, which creates an identity tower that performs
#'    no operation.
#' @return An S3 "tower" object, which is a callable function
#'    and must be passed the \code{object} as the first argument.
#'    Additional arguments will be passed to the first function
#'    in the tower.
#' @export
#' @examples
#' functions <- list(
#'   function(object, ...) {
#'     object <- object + 1
#'     object <- yield()
#'     object + 1
#'   },
#'
#'   function(object, ...) {
#'     object <- object * 2
#'     yield()
#'   }
#' )
#'
#' t <- tower(functions)
#' v <- t(1) # This is now 5, since in the tower, we increment object,
#'           # Multiply it by 2 in the next function, then increment it
#'           # again after receiving the previous function.
#' stopifnot(v == 5)
tower <- function(functions = list()) {
  stopifnot(is.list(functions),
            all(sapply(functions, is.function)))

  verify_function <- function(fn) {
    formal_names <- names(formals(fn))
    stopifnot(identical(formal_names[1], "object"))
    stopifnot(is.element("...", formal_names))
    stopifnot(is.identity2(fn) || is.element("yield", all.names(body(fn))))
  }
  lapply(functions, verify_function)

  inject <- function(fn) {
    yield <- function() {
      eval.parent(quote(
        `*NEXT.FUN*`(object, ...)
      ))
    }

    fn <- duplicate(fn)
    environment(fn) <- list2env(list(
      yield = yield
    ), parent = environment(fn))
    fn
  }

  # Inject the yield keyword to each function.
  functions <- lapply(functions, inject)
  functions[[length(functions) + 1]] <- identity2

  # Provide the next function.
  for (i in seq_len(length(functions) - 1)) {
    environment(functions[[i]])$`*NEXT.FUN*` <- functions[[i + 1]]
  }

  structure(functions[[1]], class = "tower")
}

`%>>%` <- function(lhs, rhs) {
  merge <- function(lhs, rhs) {
    # TODO: (RK) Use better heuristic here.
    if (!is.function(rhs)) {
      tower(as.pre_tower(lhs))(rhs)
    } else {
      as.pre_tower(list(lhs, rhs))
    }
  }

  merge(lhs, rhs)
}

as.pre_tower <- function(fn) {
  structure(c(recursive = TRUE, fn), class = "pre_tower")
}
is.pre_tower <- function(obj) is(obj, "pre_tower")

identity2 <- structure(function(object, ...) object, class = "identity")
is.identity2 <- function(x) is(x, "identity")

            

utils.R

              
            
              `%||%` <- function(x, y) if (is.null(x)) y else x
`%|||%` <- function(x, y) if (is.null(x) || isTRUE(is.na(x))) y else x

# A reference class that implements a stack data structure.
shtack <- methods::setRefClass('stack', list(elements = 'list'), methods = list(
  clear      = function()  { elements <<- list() },
  empty      = function()  { length(elements) == 0 },
  push       = function(x) { elements[[length(elements) + 1]] <<- x },
  peek       = function(n = 1)  {
    if (isTRUE(n)) return(elements)
    els <- seq(length(elements), length(elements) - n + 1)
    if (length(els) == 1) elements[[els]]
    else elements[els]
  },
  pop        = function()  {
    if (length(elements) == 0) stop("director:::stack is empty")
    tmp <- elements[[length(elements)]]
    elements[[length(elements)]] <<- NULL
    tmp
  },
  pop_all    = function()  { tmp <- elements; elements <<- list(); tmp }
))                                                                      

#' Whether or not a directory is an idempotent resource.
#'
#' By definition, this means the directory contains a file with the same name
#' (ignoring extension) as the directory.
#'
#' @param dir character. The directory to check.
#' @return \code{TRUE} or \code{FALSE} according as the directory is idempotent.
#'   There is no checking to ensure the directory exists.
#' @examples
#' \dontrun{
#'   # If we have a directory foo containing foo.R, then
#'   is.idempotent_directory('foo')
#'   # is TRUE, otherwise it's FALSE.
#' }
is.idempotent_directory <- function(dir) {
  # TODO: (RK) Case insensitivity in OSes that don't respect it, i.e. Windows?
  # TODO: (RK) File extensions besides .r and .R?
  extensionless_exists(file.path(dir, basename(dir)))
}

#' Determine whether an R file exists regardless of case of extension.
#'
#' @param filename character. The filename to test (possibly without extension).
#' @return \code{TRUE} or \code{FALSE} if the filename exists regardless of 
#'   R extension.
#' @examples
#' \dontrun{
#'  # Assume we have a file \code{"foo.R"}. The following all return \code{TRUE}.
#'  extensionless_exists('foo.R')
#'  extensionless_exists('foo.r')
#'  extensionless_exists('foo')
#' }
extensionless_exists <- function(filename) {
  file.exists(paste0(strip_r_extension(filename), '.r')) ||
  file.exists(paste0(strip_r_extension(filename), '.R')) 
  # Don't use the any + sapply trick because we can skip the latter check if the
  # former succeeds.
}

#' Complete the extension of a file (.r or .R).
#'
#' @note This function assumes at least one file ending in .r or .R exists.
#' @param name character. The filename sans extension.
#' @param base character. A base path to be prefixed to \code{name} when
#'   checking if the suffixed versions exist. The final returned string will
#'   not include this base.
#' @return \code{name} suffixed by ".r" or ".R" according to which exists.
#'   (Many Unix-based systems are extension case-sensitive).
#' @examples
#' \dontrun{
#'  # Assume we have a file \code{"foo.R"}.
#'  stopifnot(complete_extension("foo") == "foo.R")
#'
#'  # Assume we have a file \code{"bar.r"}.
#'  stopifnot(complete_extension("bar") == "bar.R")
#' }
complete_extension <- function(name, base = NULL) {
  upper_r <- paste0(name, ".R")
  filepath <- if (missing(base)) upper_r else file.path(base, upper_r)
  if (file.exists(filepath)) {
   upper_r 
  } else {
    paste0(name, ".r")
  }
}

#' Strip R extension.
#'
#' @param filename character. The filename to strip.
#' @return the filename without the '.r' or '.R' at the end.
strip_r_extension <- function(filename) {
  stopifnot(is.character(filename))
  gsub("\\.[rR]$", "", filename)
}

#' Strip a root file path from an absolute filename.
#'
#' @param root character. The root path.
#' @param filename character. The full file name.
#' @return the stripped file path.
#' @examples
#' \dontrun{
#'   stopifnot("test" == strip_root("foo/bar/test", "test"))
#' }
strip_root <- function(root, filename) {
  stopifnot(is.character(root) && is.character(filename))
  if (substring(filename, 1, nchar(root)) == root) {
    filename <- substring(filename, nchar(root) + 1, nchar(filename)) 
    gsub("^\\/*", "", filename)
  } else filename
}

#' Convert an idempotent resource name to a non-idempotent resource name.
#'
#' @param filename character. The filename to convert.
#' @return the non-idempotent filename.
drop_idempotence <- function(filename) {
  if (basename(dirname(filename)) == basename(filename))
    dirname(filename)
  else filename
}

#' Convert a filename to a resource name.
#'
#' @param filename character. The filename.
#' @return the resource name (i.e., stripped of idempotence and extension).
resource_name <- function(filename) {
  drop_idempotence(strip_r_extension(filename))
}

#' Get all helper files associated with an idempotent resource directory.
#'
#' @param path character. The *absolute* path of the idempotent resource.
#' @param ... additional parameters to pass to \code{list.files}.
#' @param leave_idempotent logical. Whether or not to leave the
#'   idempotent file (non-helper). By default \code{FALSE}.
#' @return a character list of relative helper paths.
#' @examples
#' \dontrun{
#'   # If we have a directory structure given by \code{"model/model.R"},
#'   # \code{"model/constants.R"}, \code{"model/functions.R"}, then the
#'   # below will return \code{c("constants.R", "functions.R")}.
#'   get_helpers("model")
#' }
get_helpers <- function(path, ..., leave_idempotent = FALSE) {
  helper_files <- list.files(path, pattern = '\\.[rR]$', ...)
  if (leave_idempotent) {
    helper_files
  } else {
    same_file <- which(vapply(helper_files, 
      function(f) basename(strip_r_extension(f)) == basename(path), logical(1)))
    helper_files[-same_file]
  }
}

#' Whether or not any substring of a string is any of a set of strings.
#'
#' @param string character.
#' @param set_of_strings character.
#' @return logical
#' @examples
#' stopifnot(director:::any_is_substring_of('test', c('blah', 'te', 'woo'))) # TRUE
#' stopifnot(!director:::any_is_substring_of('test', c('blah', 'woo'))) # FALSE
any_is_substring_of <- function(string, set_of_strings) {
  any(vapply(set_of_strings,
             function(x) substring(string, 1, nchar(x)) == x, logical(1)))
}

#' Enforce parameter types (logical, character, etc.).
#'
#' @param object ANY. An R object to enforce types on.
#' @param admissible_types character. A character vector of allowed types. 
#' @param function_name character. The function this enforcement is occurring
#'    in, for error messages.
#' @param name character. The name of the parameter whose type is being
#'    enforced. By default, the string expression passed in to the first
#'    argument, \code{object}.
#' @return Nothing, but error if the type does not match.
#' @examples
#' \dontrun{
#' x <- 1
#' enforce_type(x, "logical", "myfunction")
#' # Will call stop() with the following error:
#' # "In 'myfunction', the 'x' parameter must be a character; instead, I got
#' # a logical.
#' }
enforce_type <- function(object, admissible_types, function_name, name = deparse(substitute(object))) {
            

The is function takes parameters object and class, so this sneaky call is equivalent to

any(sapply(admissible_types, function(type) is(object, type)))
                if (!any(vapply(admissible_types, is, logical(1), object = object))) {
    stop(call. = FALSE, "In ", crayon::blue(function_name), ", the ",
         crayon::blue(name), " parameter must be a ",
         crayon::green(paste(admissible_types, collapse = " or ")),
         "; instead, I got a ", crayon::red(class(object)[1]), ".")
  }
}

#' A simple caching structure.
#'
#' @return A list of four methods \code{get}, \code{set}, \code{exists}
#'   and \code{unset} that modify another list under the hood.
simple_cache <- function() {
  cache <- list()
  list(
    get    = function(key) cache[[key]],
            

If instead we said cache[[key]] <<- value, NULL values would be cached incorrectly, since assigning NULL remove the key.

                  set    = function(key, value) cache[key] <<- structure(list(value), .Names = key),
    exists = function(key) is.element(key, names(cache)),
    unset  = function(key) cache[[key]] <<- NULL
  )
}

#' Duplicate a function object.
#'
#' @param original function.
#' @useDynLib director duplicate_
duplicate <- function(original) {
  .Call(duplicate_, original)
}

#' Append to a list or environment, overwriting if necessary.
#'
#' @param obj1. The object to be appended to.
#' @param obj2. The object to append.
#' @examples
#' \dontrun{
#'   x <- list(a = 1)
#'   x %<<% list(b = 2) # list(a = 1, b = 2)
#'   x %<<% list(a = 2) # list(a = 2)
#'   y <- list2env(x)
#'   y %<<% list(b = 2) # environment with a = 1 and b = 2
#'   y %<<% list2env(list(b = 2)) # same as above
#'   y %<<% list(a = 2) # environment with a = 2
#' }
`%<<%` <- function(obj1, obj2) {
  all_named <- function(x) { !is.null(names(x)) && all(nzchar(names(x))) }
  if (is.list(obj1)) stopifnot(all_named(obj1))
  if (is.list(obj2)) stopifnot(all_named(obj2))

  for (name in ls(obj2)) {
    obj1[[name]] <- obj2[[name]]
  }
  obj1
}

#' Queue with size limit.
#'
#' If you push more elements onto the queue than it has room for, they will
#' fall off screaming and wailing.
#'
#' @param size integer. Maximum number of elements in the queue.
#' @examples
#' \dontrun{
#'   q <- sized_queue(size = 2)
#'   q$push(1)
#'   q$get(1) # 1
#'   q$get(2) # NULL
#'   q$push(2)
#'   q$get(1) # 2
#'   q$get(2) # 1
#'   q$push(3)
#'   q$get(1) # 3
#'   q$get(2) # 2
#'   q$get(3) # NULL
#' }
sized_queue <- function(size) {
  length <- 0
  queue  <- vector('list', size)
  structure(class = "sized_queue", list(
    length = function() length,
    push = function(el) {
      queue  <<- append(list(el), queue)[seq_len(size)]
      length <<- min(length + 1, size)
      el
    },
    get = function(el) if (el > size) NULL else queue[[el]]
  ))
}
            

zzz.r

              
            
              .onAttach <- function(...) {
  
}